pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +24 -0
  3. data/LICENSE +21 -0
  4. data/README.md +614 -0
  5. data/bin/pcrd +7 -0
  6. data/lib/pcrd/advisory_lock.rb +50 -0
  7. data/lib/pcrd/apply/engine.rb +184 -0
  8. data/lib/pcrd/apply/worker.rb +97 -0
  9. data/lib/pcrd/backfill/batch.rb +158 -0
  10. data/lib/pcrd/backfill/engine.rb +153 -0
  11. data/lib/pcrd/checkpoint/store.rb +217 -0
  12. data/lib/pcrd/cli.rb +274 -0
  13. data/lib/pcrd/commands/analyze.rb +125 -0
  14. data/lib/pcrd/commands/cleanup.rb +112 -0
  15. data/lib/pcrd/commands/demo.rb +152 -0
  16. data/lib/pcrd/commands/readiness.rb +30 -0
  17. data/lib/pcrd/commands/status.rb +129 -0
  18. data/lib/pcrd/commands/verify.rb +172 -0
  19. data/lib/pcrd/config/add_column.rb +7 -0
  20. data/lib/pcrd/config/analyze_config.rb +8 -0
  21. data/lib/pcrd/config/column_spec.rb +10 -0
  22. data/lib/pcrd/config/connection.rb +7 -0
  23. data/lib/pcrd/config/cutover_config.rb +7 -0
  24. data/lib/pcrd/config/load_error.rb +7 -0
  25. data/lib/pcrd/config/loader.rb +158 -0
  26. data/lib/pcrd/config/migrate_config.rb +21 -0
  27. data/lib/pcrd/config/root.rb +9 -0
  28. data/lib/pcrd/config/schema.rb +62 -0
  29. data/lib/pcrd/config/table.rb +9 -0
  30. data/lib/pcrd/config/verify_config.rb +7 -0
  31. data/lib/pcrd/config.rb +7 -0
  32. data/lib/pcrd/connection/client.rb +129 -0
  33. data/lib/pcrd/connection/error.rb +7 -0
  34. data/lib/pcrd/connection/replication.rb +108 -0
  35. data/lib/pcrd/cutover/orchestrator.rb +108 -0
  36. data/lib/pcrd/cutover/sequences.rb +138 -0
  37. data/lib/pcrd/demo/generator.rb +214 -0
  38. data/lib/pcrd/demo/schema.rb +154 -0
  39. data/lib/pcrd/error.rb +12 -0
  40. data/lib/pcrd/migration/orchestrator.rb +272 -0
  41. data/lib/pcrd/monitor/lag.rb +107 -0
  42. data/lib/pcrd/options.rb +15 -0
  43. data/lib/pcrd/output/analyze_printer.rb +173 -0
  44. data/lib/pcrd/output/cutover_printer.rb +128 -0
  45. data/lib/pcrd/output/preflight_printer.rb +119 -0
  46. data/lib/pcrd/output/readiness_printer.rb +72 -0
  47. data/lib/pcrd/preflight.rb +331 -0
  48. data/lib/pcrd/readiness/manifest.rb +201 -0
  49. data/lib/pcrd/replication/consumer.rb +235 -0
  50. data/lib/pcrd/replication/error.rb +10 -0
  51. data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
  52. data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
  53. data/lib/pcrd/reporter/console.rb +46 -0
  54. data/lib/pcrd/reporter/null.rb +14 -0
  55. data/lib/pcrd/schema/column.rb +59 -0
  56. data/lib/pcrd/schema/ddl.rb +71 -0
  57. data/lib/pcrd/schema/diff_entry.rb +36 -0
  58. data/lib/pcrd/schema/differ.rb +175 -0
  59. data/lib/pcrd/schema/object_reader.rb +187 -0
  60. data/lib/pcrd/schema/packer.rb +90 -0
  61. data/lib/pcrd/schema/reader.rb +118 -0
  62. data/lib/pcrd/schema/setup.rb +143 -0
  63. data/lib/pcrd/schema/setup_error.rb +9 -0
  64. data/lib/pcrd/schema/table_not_found.rb +8 -0
  65. data/lib/pcrd/schema/type_registry.rb +116 -0
  66. data/lib/pcrd/sql.rb +55 -0
  67. data/lib/pcrd/transform/row_transformer.rb +69 -0
  68. data/lib/pcrd/transform/type_map.rb +209 -0
  69. data/lib/pcrd/transform/validator.rb +106 -0
  70. data/lib/pcrd/version.rb +5 -0
  71. data/lib/pcrd.rb +11 -0
  72. metadata +231 -0
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pastel"
4
+
5
+ module Pcrd
6
+ module Commands
7
+ # Drops the replication publication and slot on source, and deletes the
8
+ # local checkpoint database.
9
+ #
10
+ # Run this after the application has been successfully migrated to the target
11
+ # cluster and you're confident you won't need to roll back. The source tables
12
+ # themselves are NOT touched unless --drop-source is passed.
13
+ #
14
+ # Timeline recommendation:
15
+ # - Verify the app is healthy on the target cluster
16
+ # - Wait a few days (or a week) as a rollback window
17
+ # - Then run `pcrd cleanup`
18
+ # - Optionally run `pcrd cleanup --drop-source` weeks later
19
+ class Cleanup
20
+ PASTEL = Pastel.new
21
+
22
+ def initialize(config, options = {})
23
+ @config = config
24
+ @options = Options.normalize(options)
25
+ end
26
+
27
+ def run(output: $stdout)
28
+ output.puts
29
+ output.puts PASTEL.bold("Cleanup")
30
+ output.puts PASTEL.dim("─" * 60)
31
+ output.puts
32
+
33
+ drop_slot_and_pub(output)
34
+ drop_checkpoint(output)
35
+ drop_source_tables(output) if @options[:"drop-source"]
36
+
37
+ output.puts
38
+ output.puts " #{PASTEL.green("✓")} Cleanup complete."
39
+ output.puts
40
+ end
41
+
42
+ private
43
+
44
+ def drop_slot_and_pub(output)
45
+ return unless @config.source && @config.migrate
46
+
47
+ slot = @config.migrate.replication_slot
48
+ pub = @config.migrate.publication
49
+
50
+ pool = Connection::Client.new(@config.source)
51
+
52
+ # Drop replication slot
53
+ result = pool.exec(
54
+ "SELECT pg_drop_replication_slot($1) " \
55
+ "WHERE EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = $1)",
56
+ [slot]
57
+ )
58
+ if result.ntuples > 0
59
+ output.puts " #{PASTEL.green("✓")} Dropped replication slot: #{slot}"
60
+ else
61
+ output.puts " #{PASTEL.dim("·")} Replication slot not found (already dropped): #{slot}"
62
+ end
63
+
64
+ # Drop publication
65
+ pool.exec_sql("DROP PUBLICATION IF EXISTS #{pool.quote_ident(pub)}")
66
+ output.puts " #{PASTEL.green("✓")} Dropped publication: #{pub}"
67
+
68
+ pool.close
69
+ rescue Connection::Error => e
70
+ output.puts " #{PASTEL.yellow("⚠")} Could not connect to source to drop slot/publication: #{e.message}"
71
+ output.puts " Drop manually: SELECT pg_drop_replication_slot('#{slot}');"
72
+ output.puts " DROP PUBLICATION IF EXISTS #{pub};"
73
+ end
74
+
75
+ def drop_checkpoint(output)
76
+ path = @config.migrate&.checkpoint_db || "./pcrd_checkpoint.sqlite3"
77
+
78
+ if File.exist?(path)
79
+ File.delete(path)
80
+ output.puts " #{PASTEL.green("✓")} Deleted checkpoint: #{path}"
81
+ else
82
+ output.puts " #{PASTEL.dim("·")} Checkpoint not found (already deleted): #{path}"
83
+ end
84
+ end
85
+
86
+ def drop_source_tables(output)
87
+ return unless @config.source && @config.migrate
88
+
89
+ table_names = @config.migrate.tables.map(&:name)
90
+
91
+ output.puts
92
+ output.puts " #{PASTEL.yellow("⚠")} Dropping source tables: #{table_names.join(', ')}"
93
+ output.puts " #{PASTEL.yellow("This is irreversible.")} Type the first table name to confirm:"
94
+
95
+ input = $stdin.gets&.strip
96
+ unless input == table_names.first
97
+ output.puts " #{PASTEL.red("Aborted.")} No tables were dropped."
98
+ return
99
+ end
100
+
101
+ pool = Connection::Client.new(@config.source)
102
+ table_names.each do |name|
103
+ pool.exec_sql("DROP TABLE IF EXISTS public.#{pool.quote_ident(name)} CASCADE")
104
+ output.puts " #{PASTEL.green("✓")} Dropped source table: #{name}"
105
+ end
106
+ pool.close
107
+ rescue Connection::Error => e
108
+ output.puts " #{PASTEL.red("✗")} Failed to drop source tables: #{e.message}"
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+
5
+ module Pcrd
6
+ module Commands
7
+ class Demo < Thor
8
+ def self.exit_on_failure?
9
+ true
10
+ end
11
+
12
+ class_option :config, type: :string, aliases: "-c",
13
+ desc: "Path to migration YAML config (default: pcrd.config.yml)"
14
+
15
+ desc "setup", "Create demo schema on the source database"
16
+ long_desc <<~DESC
17
+ Creates three tables on the source database: users, agents, and listings.
18
+
19
+ The listings table is intentionally ordered with booleans and smallints
20
+ interleaved among 8-byte columns to demonstrate the padding analysis
21
+ feature of `pcrd analyze`.
22
+
23
+ Any existing demo tables are dropped and recreated.
24
+
25
+ If no pcrd.config.yml exists in the current directory, a sample config
26
+ is written automatically — edit the host/port values to match your setup.
27
+ DESC
28
+ def setup
29
+ config = load_config!
30
+ pool = Pcrd::Connection::Client.new(config.source)
31
+
32
+ say "Connecting to #{config.source.host}:#{config.source.port}/#{config.source.database}..."
33
+
34
+ say "Dropping existing demo tables (if any)..."
35
+ pool.exec_sql(Pcrd::Demo::Schema::DROP_SQL)
36
+
37
+ say "Creating users table..."
38
+ pool.exec_sql(Pcrd::Demo::Schema::USERS_DDL)
39
+
40
+ say "Creating agents table..."
41
+ pool.exec_sql(Pcrd::Demo::Schema::AGENTS_DDL)
42
+
43
+ say "Creating listings table (with intentionally poor column ordering)..."
44
+ pool.exec_sql(Pcrd::Demo::Schema::LISTINGS_DDL)
45
+ pool.exec_sql(Pcrd::Demo::Schema::LISTINGS_FK_DDL)
46
+
47
+ pool.close
48
+
49
+ write_sample_config unless config_file_exists?
50
+
51
+ say ""
52
+ say "Done. Run `pcrd demo seed` to populate with sample data.", :green
53
+ say "Then run `pcrd analyze` to see the column padding analysis.", :green
54
+ rescue Pcrd::Connection::Error => e
55
+ raise Thor::Error, "Connection failed: #{e.message}"
56
+ rescue Pcrd::Config::LoadError => e
57
+ raise Thor::Error, e.message
58
+ end
59
+
60
+ desc "seed", "Generate sample data in the demo schema"
61
+ long_desc <<~DESC
62
+ Populates the demo tables with realistic fake data.
63
+
64
+ Generates users and agents proportional to the listing count, then
65
+ generates the requested number of listings referencing those agents.
66
+
67
+ The data is seeded with a fixed random seed for reproducibility — running
68
+ seed twice with the same --rows value produces the same rows (useful for
69
+ testing). Pass --seed to override.
70
+ DESC
71
+ method_option :rows, type: :numeric, default: 50_000,
72
+ desc: "Number of listing rows to generate (users and agents scale proportionally)"
73
+ method_option :seed, type: :numeric, default: 42,
74
+ desc: "Random seed for reproducible data generation"
75
+ def seed
76
+ config = load_config!
77
+ pool = Pcrd::Connection::Client.new(config.source)
78
+ generator = Pcrd::Demo::Generator.new(pool, seed: options[:seed])
79
+
80
+ say "Seeding demo database at #{config.source.host}/#{config.source.database}..."
81
+ say ""
82
+
83
+ counts = generator.generate(listing_count: options[:rows])
84
+
85
+ pool.close
86
+
87
+ say ""
88
+ say "Seeding complete:", :green
89
+ say " users: #{format_count(counts[:users])}"
90
+ say " agents: #{format_count(counts[:agents])}"
91
+ say " listings: #{format_count(counts[:listings])}"
92
+ say ""
93
+ say "Run `pcrd analyze` to see the column padding report."
94
+ rescue Pcrd::Connection::Error => e
95
+ raise Thor::Error, "Connection failed: #{e.message}"
96
+ rescue Pcrd::Config::LoadError => e
97
+ raise Thor::Error, e.message
98
+ end
99
+
100
+ desc "reset", "Drop all demo tables (non-destructive: data only, not config)"
101
+ def reset
102
+ config = load_config!
103
+ pool = Pcrd::Connection::Client.new(config.source)
104
+
105
+ say "Dropping demo tables on #{config.source.host}/#{config.source.database}..."
106
+ pool.exec_sql(Pcrd::Demo::Schema::DROP_SQL)
107
+ pool.close
108
+
109
+ say "Done.", :green
110
+ rescue Pcrd::Connection::Error => e
111
+ raise Thor::Error, "Connection failed: #{e.message}"
112
+ rescue Pcrd::Config::LoadError => e
113
+ raise Thor::Error, e.message
114
+ end
115
+
116
+ private
117
+
118
+ def load_config!
119
+ path = options[:config] || default_config_path
120
+ Pcrd::Config::Loader.load(path)
121
+ end
122
+
123
+ def default_config_path
124
+ default = Pcrd::Config::DEFAULT_CONFIG_FILE
125
+ return default if File.exist?(default)
126
+
127
+ # Demo setup can run without a config — we'll write one if absent.
128
+ # Fall back to a temporary in-memory config using defaults.
129
+ write_sample_config
130
+ default
131
+ end
132
+
133
+ def config_file_exists?
134
+ File.exist?(Pcrd::Config::DEFAULT_CONFIG_FILE)
135
+ end
136
+
137
+ def write_sample_config
138
+ path = Pcrd::Config::DEFAULT_CONFIG_FILE
139
+ if File.exist?(path)
140
+ say " (#{path} already exists — not overwriting)"
141
+ else
142
+ File.write(path, Pcrd::Demo::Schema::SAMPLE_CONFIG)
143
+ say " Wrote sample config to #{path} — edit host/port values to match your setup.", :cyan
144
+ end
145
+ end
146
+
147
+ def format_count(n)
148
+ n.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Commands
5
+ # Builds the target-readiness manifest by comparing source and target.
6
+ # Read-only; does not modify either cluster.
7
+ class Readiness
8
+ def initialize(config, options = {})
9
+ @config = config
10
+ @options = Options.normalize(options)
11
+ end
12
+
13
+ def run
14
+ raise ConfigError, "target connection required for readiness" if @config.target.nil?
15
+ raise ConfigError, "no tables configured" if (@config.migrate&.tables || []).empty?
16
+
17
+ source = Connection::Client.new(@config.source)
18
+ target = Connection::Client.new(@config.target)
19
+
20
+ result = Pcrd::Readiness::Manifest.new(
21
+ source_pool: source, target_pool: target, config: @config
22
+ ).build
23
+
24
+ source.close
25
+ target.close
26
+ result
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pastel"
4
+
5
+ module Pcrd
6
+ module Commands
7
+ # Displays the current migration state from the checkpoint database and
8
+ # (optionally) queries the live replication slot for current lag.
9
+ #
10
+ # Reads entirely from local state (checkpoint SQLite) so it works without
11
+ # an active connection to source or target. If source is reachable, also
12
+ # shows live replication lag and estimated time to cutover readiness.
13
+ class Status
14
+ PASTEL = Pastel.new
15
+
16
+ PHASE_LABELS = {
17
+ new: "not started",
18
+ backfill: "backfill in progress",
19
+ streaming: "streaming (catchup phase)",
20
+ cutover: "cutover complete"
21
+ }.freeze
22
+
23
+ def initialize(config, options = {})
24
+ @config = config
25
+ @options = Options.normalize(options)
26
+ end
27
+
28
+ def run
29
+ checkpoint_path = @config.migrate&.checkpoint_db || "./pcrd_checkpoint.sqlite3"
30
+
31
+ unless File.exist?(checkpoint_path)
32
+ puts
33
+ puts " #{PASTEL.yellow("No checkpoint found at #{checkpoint_path}")}"
34
+ puts " Run `pcrd migrate` to start the migration."
35
+ puts
36
+ return
37
+ end
38
+
39
+ store = Checkpoint::Store.new(checkpoint_path)
40
+ print_status(store)
41
+ store.close
42
+ end
43
+
44
+ private
45
+
46
+ def print_status(store)
47
+ phase = store.phase
48
+ started = store.started_at
49
+ lsn = store.lsn
50
+
51
+ puts
52
+ puts PASTEL.bold("Migration status")
53
+ puts PASTEL.dim("─" * 60)
54
+ puts
55
+
56
+ puts " Phase: #{PASTEL.bold(phase_label(phase))}"
57
+ puts " Started: #{started || PASTEL.dim("unknown")}"
58
+ puts " LSN: #{lsn || PASTEL.dim("none")}" if lsn
59
+ puts
60
+
61
+ tables = @config.migrate&.tables || []
62
+ if tables.any?
63
+ puts " #{PASTEL.bold("Tables:")}"
64
+ tables.each { |t| print_table_status(store, t.name) }
65
+ puts
66
+ end
67
+
68
+ print_live_lag(store)
69
+ end
70
+
71
+ def print_table_status(store, table_name)
72
+ stats = store.batch_stats(table: table_name)
73
+ last_key = store.last_completed_key(table: table_name)
74
+
75
+ total_rows = stats[:total_rows]
76
+ batches = stats[:batch_count]
77
+ rps = stats[:avg_rows_per_sec]
78
+
79
+ if batches.zero?
80
+ puts " #{PASTEL.dim("○")} #{table_name} #{PASTEL.dim("not started")}"
81
+ else
82
+ rps_label = rps > 0 ? " #{PASTEL.dim("avg #{format_count(rps.to_i)} rows/sec")}" : ""
83
+ puts " #{PASTEL.green("✓")} #{table_name} " \
84
+ "#{format_count(total_rows)} rows copied " \
85
+ "(#{batches} batch#{batches == 1 ? '' : 'es'})#{rps_label}"
86
+ puts " last key: #{last_key.inspect}" if last_key && @options[:verbose]
87
+ end
88
+ end
89
+
90
+ def print_live_lag(store)
91
+ return unless @config.source && @config.migrate&.replication_slot
92
+
93
+ source_pool = Connection::Client.new(@config.source)
94
+ lag_monitor = Monitor::Lag.new(
95
+ source_pool: source_pool,
96
+ slot_name: @config.migrate.replication_slot
97
+ )
98
+
99
+ lag = lag_monitor.lag_bytes
100
+ threshold = @config.migrate&.lag_threshold_bytes || 1_048_576
101
+
102
+ if lag.nil?
103
+ puts " #{PASTEL.dim("Replication slot not found or not active")}"
104
+ elsif lag == 0
105
+ puts " #{PASTEL.green("Replication lag: 0 bytes")} #{PASTEL.green("✓ Ready for cutover")}"
106
+ elsif lag <= threshold
107
+ puts " Replication lag: #{PASTEL.green(lag_monitor.summary)} #{PASTEL.green("✓ Ready for cutover")}"
108
+ else
109
+ puts " Replication lag: #{lag_monitor.summary}"
110
+ end
111
+
112
+ lsn = lag_monitor.confirmed_lsn
113
+ puts " Confirmed LSN: #{lsn}" if lsn
114
+
115
+ source_pool.close
116
+ rescue Connection::Error
117
+ puts " #{PASTEL.dim("(source not reachable — showing checkpoint data only)")}"
118
+ end
119
+
120
+ def phase_label(phase)
121
+ PHASE_LABELS[phase] || phase.to_s
122
+ end
123
+
124
+ def format_count(n)
125
+ n.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Commands
5
+ # Compares row counts and spot-checks random rows between source and target.
6
+ #
7
+ # Safe to run at any time after backfill completes. Does not modify either cluster.
8
+ class Verify
9
+ MismatchError = Class.new(StandardError)
10
+
11
+ Result = Data.define(:passed, :tables)
12
+ TableResult = Data.define(:table_name, :source_count, :target_count,
13
+ :sample_size, :mismatches)
14
+
15
+ def initialize(config, options = {})
16
+ @config = config
17
+ @options = Options.normalize(options)
18
+ end
19
+
20
+ def run
21
+ validate_config!
22
+
23
+ source_pool = Connection::Client.new(@config.source)
24
+ target_pool = Connection::Client.new(@config.target)
25
+ sample_size = @options[:"sample-size"] || @config.verify&.sample_size || 1_000
26
+
27
+ table_results = (@config.migrate&.tables || []).map do |table_config|
28
+ verify_table(source_pool, target_pool, table_config, sample_size)
29
+ end
30
+
31
+ source_pool.close
32
+ target_pool.close
33
+
34
+ Result.new(
35
+ passed: table_results.all? { |r| r.mismatches.empty? && r.source_count == r.target_count },
36
+ tables: table_results
37
+ )
38
+ end
39
+
40
+ private
41
+
42
+ def verify_table(source_pool, target_pool, table_config, sample_size)
43
+ table_name = table_config.name
44
+ src_count = source_pool.exec("SELECT COUNT(*) FROM #{Sql.quote_table(table_name)}")[0]["count"].to_i
45
+ tgt_count = target_pool.exec("SELECT COUNT(*) FROM #{Sql.quote_table(table_name)}")[0]["count"].to_i
46
+
47
+ mismatches = []
48
+
49
+ if src_count == tgt_count && src_count > 0
50
+ mismatches = spot_check(source_pool, target_pool, table_config, sample_size)
51
+ end
52
+
53
+ TableResult.new(
54
+ table_name: table_name,
55
+ source_count: src_count,
56
+ target_count: tgt_count,
57
+ sample_size: [sample_size, src_count].min,
58
+ mismatches: mismatches
59
+ )
60
+ rescue Connection::Error => e
61
+ TableResult.new(
62
+ table_name: table_name,
63
+ source_count: nil,
64
+ target_count: nil,
65
+ sample_size: 0,
66
+ mismatches: ["Connection error: #{e.message}"]
67
+ )
68
+ end
69
+
70
+ # Samples source rows, transforms each into its expected target shape, and
71
+ # compares the values field-by-field against the matching target row.
72
+ # This is what catches a transform that silently corrupts data — a row
73
+ # count match alone does not.
74
+ def spot_check(source_pool, target_pool, table_config, sample_size)
75
+ table_name = table_config.name
76
+ reader = Schema::Reader.new(source_pool)
77
+ source_cols = reader.read(table_name)
78
+ pk_cols = reader.primary_key_columns(table_name)
79
+ return [] if pk_cols.empty?
80
+
81
+ transformer = Transform::RowTransformer.new(table_config, source_cols)
82
+ pk_target = map_pk_to_target(pk_cols, table_config)
83
+
84
+ sample_rows = sample_source_rows(source_pool, table_name, sample_size)
85
+ return [] if sample_rows.empty?
86
+
87
+ target_table = Sql.quote_table(table_name)
88
+ conditions = pk_target.each_with_index
89
+ .map { |col, i| "#{Sql.quote_ident(col)} = $#{i + 1}" }
90
+ .join(" AND ")
91
+
92
+ mismatches = []
93
+ sample_rows.each do |src_row|
94
+ expected = transformer.transform(src_row) # { target_col => value }
95
+ pk_values = pk_cols.map { |col| src_row[col] }
96
+ pk_desc = pk_cols.zip(pk_values).map { |c, v| "#{c}=#{v}" }.join(",")
97
+
98
+ tgt_row = target_pool.exec(
99
+ "SELECT * FROM #{target_table} WHERE #{conditions}", pk_values
100
+ ).first
101
+
102
+ if tgt_row.nil?
103
+ mismatches << "pk=#{pk_desc}: row missing on target"
104
+ next
105
+ end
106
+
107
+ expected.each do |col, exp_val|
108
+ act_val = tgt_row[col]
109
+ next if values_equal?(exp_val, act_val)
110
+
111
+ mismatches << "pk=#{pk_desc} col=#{col}: " \
112
+ "source=#{redact(exp_val)} target=#{redact(act_val)}"
113
+ end
114
+ end
115
+
116
+ mismatches
117
+ end
118
+
119
+ # Samples up to sample_size rows cheaply. ORDER BY random() sorts the whole
120
+ # table; instead use TABLESAMPLE SYSTEM (page-level random) for large
121
+ # tables and a plain LIMIT for small ones. Oversample then cap so an
122
+ # unlucky page selection still tends to fill the sample.
123
+ def sample_source_rows(pool, table_name, sample_size)
124
+ quoted = Sql.quote_table(table_name)
125
+ est = Schema::Reader.new(pool).estimated_row_count(table_name)
126
+
127
+ if est <= sample_size
128
+ return pool.exec("SELECT * FROM #{quoted} LIMIT $1", [sample_size]).to_a
129
+ end
130
+
131
+ pct = [[sample_size * 100.0 / est * 3.0, 0.01].max, 100.0].min
132
+ rows = pool.exec(
133
+ "SELECT * FROM #{quoted} TABLESAMPLE SYSTEM (#{pct.round(6)}) LIMIT $1",
134
+ [sample_size]
135
+ ).to_a
136
+
137
+ # TABLESAMPLE can under-fill on small/unlucky page layouts; fall back.
138
+ rows.empty? ? pool.exec("SELECT * FROM #{quoted} LIMIT $1", [sample_size]).to_a : rows
139
+ end
140
+
141
+ def map_pk_to_target(pk_source_cols, table_config)
142
+ pk_source_cols.map do |src|
143
+ spec = table_config.columns&.[](src) || table_config.columns&.[](src.to_sym)
144
+ spec&.rename || src
145
+ end
146
+ end
147
+
148
+ # Values come back from libpq as strings (or nil) on both sides, so a
149
+ # textual comparison correctly treats e.g. int4 99 and int8 99 as equal
150
+ # while still catching genuinely different values.
151
+ def values_equal?(expected, actual)
152
+ return true if expected.nil? && actual.nil?
153
+ return false if expected.nil? || actual.nil?
154
+
155
+ expected.to_s == actual.to_s
156
+ end
157
+
158
+ def redact(val)
159
+ return "NULL" if val.nil?
160
+
161
+ str = val.to_s
162
+ str.length > 60 ? "#{str[0, 57]}..." : str
163
+ end
164
+
165
+ def validate_config!
166
+ raise ConfigError, "source connection required" if @config.source.nil?
167
+ raise ConfigError, "target connection required for verify" if @config.target.nil?
168
+ raise ConfigError, "no tables configured" if (@config.migrate&.tables || []).empty?
169
+ end
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Config
5
+ AddColumn = Data.define(:name, :type, :default)
6
+ end
7
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Config
5
+ # tables: Array<String> of table names, or nil to use migrate.tables
6
+ AnalyzeConfig = Data.define(:tables)
7
+ end
8
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Config
5
+ # Spec for an existing column. All fields are optional:
6
+ # nil type means keep the current type; nil rename means keep the name;
7
+ # drop: false means keep the column.
8
+ ColumnSpec = Data.define(:type, :rename, :drop)
9
+ end
10
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Config
5
+ Connection = Data.define(:host, :port, :database, :user, :password)
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Config
5
+ CutoverConfig = Data.define(:sequence_buffer, :lag_drain_timeout)
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Config
5
+ class LoadError < Pcrd::Error; end
6
+ end
7
+ end