pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +24 -0
  3. data/LICENSE +21 -0
  4. data/README.md +614 -0
  5. data/bin/pcrd +7 -0
  6. data/lib/pcrd/advisory_lock.rb +50 -0
  7. data/lib/pcrd/apply/engine.rb +184 -0
  8. data/lib/pcrd/apply/worker.rb +97 -0
  9. data/lib/pcrd/backfill/batch.rb +158 -0
  10. data/lib/pcrd/backfill/engine.rb +153 -0
  11. data/lib/pcrd/checkpoint/store.rb +217 -0
  12. data/lib/pcrd/cli.rb +274 -0
  13. data/lib/pcrd/commands/analyze.rb +125 -0
  14. data/lib/pcrd/commands/cleanup.rb +112 -0
  15. data/lib/pcrd/commands/demo.rb +152 -0
  16. data/lib/pcrd/commands/readiness.rb +30 -0
  17. data/lib/pcrd/commands/status.rb +129 -0
  18. data/lib/pcrd/commands/verify.rb +172 -0
  19. data/lib/pcrd/config/add_column.rb +7 -0
  20. data/lib/pcrd/config/analyze_config.rb +8 -0
  21. data/lib/pcrd/config/column_spec.rb +10 -0
  22. data/lib/pcrd/config/connection.rb +7 -0
  23. data/lib/pcrd/config/cutover_config.rb +7 -0
  24. data/lib/pcrd/config/load_error.rb +7 -0
  25. data/lib/pcrd/config/loader.rb +158 -0
  26. data/lib/pcrd/config/migrate_config.rb +21 -0
  27. data/lib/pcrd/config/root.rb +9 -0
  28. data/lib/pcrd/config/schema.rb +62 -0
  29. data/lib/pcrd/config/table.rb +9 -0
  30. data/lib/pcrd/config/verify_config.rb +7 -0
  31. data/lib/pcrd/config.rb +7 -0
  32. data/lib/pcrd/connection/client.rb +129 -0
  33. data/lib/pcrd/connection/error.rb +7 -0
  34. data/lib/pcrd/connection/replication.rb +108 -0
  35. data/lib/pcrd/cutover/orchestrator.rb +108 -0
  36. data/lib/pcrd/cutover/sequences.rb +138 -0
  37. data/lib/pcrd/demo/generator.rb +214 -0
  38. data/lib/pcrd/demo/schema.rb +154 -0
  39. data/lib/pcrd/error.rb +12 -0
  40. data/lib/pcrd/migration/orchestrator.rb +272 -0
  41. data/lib/pcrd/monitor/lag.rb +107 -0
  42. data/lib/pcrd/options.rb +15 -0
  43. data/lib/pcrd/output/analyze_printer.rb +173 -0
  44. data/lib/pcrd/output/cutover_printer.rb +128 -0
  45. data/lib/pcrd/output/preflight_printer.rb +119 -0
  46. data/lib/pcrd/output/readiness_printer.rb +72 -0
  47. data/lib/pcrd/preflight.rb +331 -0
  48. data/lib/pcrd/readiness/manifest.rb +201 -0
  49. data/lib/pcrd/replication/consumer.rb +235 -0
  50. data/lib/pcrd/replication/error.rb +10 -0
  51. data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
  52. data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
  53. data/lib/pcrd/reporter/console.rb +46 -0
  54. data/lib/pcrd/reporter/null.rb +14 -0
  55. data/lib/pcrd/schema/column.rb +59 -0
  56. data/lib/pcrd/schema/ddl.rb +71 -0
  57. data/lib/pcrd/schema/diff_entry.rb +36 -0
  58. data/lib/pcrd/schema/differ.rb +175 -0
  59. data/lib/pcrd/schema/object_reader.rb +187 -0
  60. data/lib/pcrd/schema/packer.rb +90 -0
  61. data/lib/pcrd/schema/reader.rb +118 -0
  62. data/lib/pcrd/schema/setup.rb +143 -0
  63. data/lib/pcrd/schema/setup_error.rb +9 -0
  64. data/lib/pcrd/schema/table_not_found.rb +8 -0
  65. data/lib/pcrd/schema/type_registry.rb +116 -0
  66. data/lib/pcrd/sql.rb +55 -0
  67. data/lib/pcrd/transform/row_transformer.rb +69 -0
  68. data/lib/pcrd/transform/type_map.rb +209 -0
  69. data/lib/pcrd/transform/validator.rb +106 -0
  70. data/lib/pcrd/version.rb +5 -0
  71. data/lib/pcrd.rb +11 -0
  72. metadata +231 -0
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sqlite3"
4
+ require "json"
5
+
6
+ module Pcrd
7
+ module Checkpoint
8
+ # SQLite-backed store for migration progress.
9
+ #
10
+ # Tracks two things:
11
+ # 1. Metadata (phase, LSN watermark, start time) — key/value rows
12
+ # 2. Completed batches — one row per successfully copied batch,
13
+ # with start/end key, row count, duration, and timestamp.
14
+ #
15
+ # The per-batch log is what makes resumption safe and auditable:
16
+ # on resume, `last_completed_key` returns the highest end_key and
17
+ # the backfill skips straight past it. It also powers throughput
18
+ # stats and ETA estimates.
19
+ class Store
20
+ # A PostgreSQL LSN is two hex segments joined by a slash, e.g. "16/B374D848".
21
+ LSN_FORMAT = /\A[0-9A-Fa-f]+\/[0-9A-Fa-f]+\z/
22
+
23
+ SCHEMA_SQL = <<~SQL.freeze
24
+ CREATE TABLE IF NOT EXISTS metadata (
25
+ key TEXT PRIMARY KEY,
26
+ value TEXT NOT NULL
27
+ );
28
+
29
+ CREATE TABLE IF NOT EXISTS batches (
30
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
31
+ table_name TEXT NOT NULL,
32
+ start_key TEXT NOT NULL,
33
+ end_key TEXT NOT NULL,
34
+ row_count INTEGER NOT NULL,
35
+ duration_ms INTEGER NOT NULL,
36
+ completed_at TEXT NOT NULL
37
+ );
38
+
39
+ CREATE INDEX IF NOT EXISTS idx_batches_table
40
+ ON batches (table_name, id DESC);
41
+ SQL
42
+
43
+ def initialize(path)
44
+ @path = path
45
+ @db = SQLite3::Database.new(path)
46
+ @db.results_as_hash = true
47
+ @db.busy_timeout = 5_000
48
+ @db.execute_batch(SCHEMA_SQL)
49
+ # Backfill (recording batches) and the apply worker (recording LSN) hit
50
+ # this store from two threads at once. SQLite3::Database is a single
51
+ # connection, so serialize every @db access through one mutex. The lock
52
+ # is taken only at the lowest level (get_meta/set_meta and the batch
53
+ # methods) so the public wrappers never re-enter it.
54
+ @mutex = Mutex.new
55
+ end
56
+
57
+ def close
58
+ @mutex.synchronize { @db.close unless @db.closed? }
59
+ end
60
+
61
+ # ── phase & LSN metadata ─────────────────────────────────────────────
62
+
63
+ def phase
64
+ val = get_meta("phase")
65
+ val ? val.to_sym : :new
66
+ end
67
+
68
+ def set_phase(phase)
69
+ set_meta("phase", phase.to_s)
70
+ end
71
+
72
+ def lsn
73
+ get_meta("current_lsn")
74
+ end
75
+
76
+ def set_lsn(lsn)
77
+ unless lsn.is_a?(String) && lsn.match?(LSN_FORMAT)
78
+ raise ArgumentError, "invalid LSN: #{lsn.inspect}"
79
+ end
80
+
81
+ set_meta("current_lsn", lsn)
82
+ end
83
+
84
+ def backfill_start_lsn
85
+ get_meta("backfill_start_lsn")
86
+ end
87
+
88
+ def set_backfill_start_lsn(lsn)
89
+ set_meta("backfill_start_lsn", lsn)
90
+ end
91
+
92
+ # Replication objects this migration created, recorded so a resume can be
93
+ # cross-checked against the config and cleanup knows what to remove.
94
+ def replication_slot
95
+ get_meta("replication_slot")
96
+ end
97
+
98
+ def set_replication_slot(name)
99
+ set_meta("replication_slot", name)
100
+ end
101
+
102
+ def publication
103
+ get_meta("publication")
104
+ end
105
+
106
+ def set_publication(name)
107
+ set_meta("publication", name)
108
+ end
109
+
110
+ def started_at
111
+ get_meta("started_at")
112
+ end
113
+
114
+ def set_started_at(ts)
115
+ set_meta("started_at", ts)
116
+ end
117
+
118
+ # ── batch tracking ───────────────────────────────────────────────────
119
+
120
+ # Record a successfully completed batch.
121
+ # Keys are JSON-encoded to support multi-column primary keys.
122
+ def record_batch(table:, start_key:, end_key:, row_count:, duration_ms:)
123
+ @mutex.synchronize do
124
+ @db.execute(
125
+ "INSERT INTO batches (table_name, start_key, end_key, row_count, duration_ms, completed_at) " \
126
+ "VALUES (?, ?, ?, ?, ?, ?)",
127
+ [table.to_s,
128
+ JSON.generate(start_key),
129
+ JSON.generate(end_key),
130
+ row_count.to_i,
131
+ duration_ms.to_i,
132
+ Time.now.iso8601]
133
+ )
134
+ end
135
+ end
136
+
137
+ # Returns the end_key of the last completed batch for a table, decoded from JSON.
138
+ # Returns nil if no batches have been recorded for this table (fresh start).
139
+ def last_completed_key(table:)
140
+ row = @mutex.synchronize do
141
+ @db.get_first_row(
142
+ "SELECT end_key FROM batches WHERE table_name = ? ORDER BY id DESC LIMIT 1",
143
+ [table.to_s]
144
+ )
145
+ end
146
+ row ? JSON.parse(row["end_key"]) : nil
147
+ end
148
+
149
+ # Returns aggregate stats for a table's completed batches.
150
+ def batch_stats(table:)
151
+ row = @mutex.synchronize do
152
+ @db.get_first_row(
153
+ "SELECT COUNT(*) AS cnt, SUM(row_count) AS total_rows, " \
154
+ "AVG(CAST(row_count AS REAL) / NULLIF(duration_ms, 0) * 1000) AS avg_rps " \
155
+ "FROM batches WHERE table_name = ?",
156
+ [table.to_s]
157
+ )
158
+ end
159
+ {
160
+ batch_count: row["cnt"].to_i,
161
+ total_rows: row["total_rows"].to_i,
162
+ avg_rows_per_sec: row["avg_rps"]&.round(1) || 0.0
163
+ }
164
+ end
165
+
166
+ # All completed batches for a table, newest first.
167
+ def batches(table:, limit: 100)
168
+ rows = @mutex.synchronize do
169
+ @db.execute(
170
+ "SELECT * FROM batches WHERE table_name = ? ORDER BY id DESC LIMIT ?",
171
+ [table.to_s, limit]
172
+ )
173
+ end
174
+ rows.map do |row|
175
+ {
176
+ id: row["id"].to_i,
177
+ table_name: row["table_name"],
178
+ start_key: JSON.parse(row["start_key"]),
179
+ end_key: JSON.parse(row["end_key"]),
180
+ row_count: row["row_count"].to_i,
181
+ duration_ms: row["duration_ms"].to_i,
182
+ completed_at: row["completed_at"]
183
+ }
184
+ end
185
+ end
186
+
187
+ def total_rows_copied(table:)
188
+ row = @mutex.synchronize do
189
+ @db.get_first_row(
190
+ "SELECT COALESCE(SUM(row_count), 0) AS total FROM batches WHERE table_name = ?",
191
+ [table.to_s]
192
+ )
193
+ end
194
+ row["total"].to_i
195
+ end
196
+
197
+ private
198
+
199
+ def get_meta(key)
200
+ row = @mutex.synchronize do
201
+ @db.get_first_row("SELECT value FROM metadata WHERE key = ?", [key])
202
+ end
203
+ row ? row["value"] : nil
204
+ end
205
+
206
+ def set_meta(key, value)
207
+ @mutex.synchronize do
208
+ @db.execute(
209
+ "INSERT INTO metadata (key, value) VALUES (?, ?) " \
210
+ "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
211
+ [key, value.to_s]
212
+ )
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end
data/lib/pcrd/cli.rb ADDED
@@ -0,0 +1,274 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require "pastel"
5
+
6
+ module Pcrd
7
+ class CLI < Thor
8
+ PASTEL = Pastel.new
9
+
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ class_option :config, type: :string, aliases: "-c",
15
+ desc: "Path to migration YAML config (required for all commands)"
16
+
17
+ map %w[--version -v] => :version
18
+ desc "--version, -v", "Show pcrd version"
19
+ def version
20
+ say "pcrd #{Pcrd::VERSION}"
21
+ end
22
+
23
+ desc "analyze", "Analyze column padding for source tables"
24
+ long_desc <<~DESC
25
+ Reads the source table schema and reports the current column layout alongside
26
+ the optimal column ordering for minimal padding waste. Estimates bytes saved
27
+ per row and total storage reclaimed at current row count.
28
+
29
+ With --compare-target, also connects to the target cluster and shows a
30
+ side-by-side diff: type changes, renames, added/dropped columns, and the
31
+ padding delta between source and target schemas.
32
+
33
+ This command is read-only and requires no migration to be in progress.
34
+ DESC
35
+ method_option :table, type: :string, aliases: "-t",
36
+ desc: "Analyze a specific table only (default: all tables in config)"
37
+ method_option :"compare-target", type: :boolean, default: false,
38
+ desc: "Compare source and target schemas side-by-side"
39
+ def analyze
40
+ config = load_config!
41
+ Commands::Analyze.new(config, options).run
42
+ rescue Commands::Analyze::Error => e
43
+ raise Thor::Error, "ERROR: #{e.message}"
44
+ rescue Connection::Error => e
45
+ raise Thor::Error, "Connection failed: #{e.message}"
46
+ end
47
+
48
+ desc "migrate", "Start or resume the migration"
49
+ long_desc <<~DESC
50
+ Runs preflight checks, creates the replication slot and publication on source,
51
+ creates the target table with the new schema, then runs backfill and streaming
52
+ concurrently until the operator triggers cutover.
53
+
54
+ The process is resumable: if interrupted, re-run with --resume to pick up
55
+ from the last completed backfill batch.
56
+
57
+ Use --preflight-only to validate the config and source data without starting
58
+ the migration. Use --backfill-only to copy existing rows without starting
59
+ the WAL streaming consumer.
60
+ DESC
61
+ method_option :resume, type: :boolean, default: false,
62
+ desc: "Resume an interrupted migration from the last checkpoint"
63
+ method_option :"preflight-only", type: :boolean, default: false,
64
+ desc: "Run preflight checks and print DDL only; do not start migration"
65
+ method_option :"backfill-only", type: :boolean, default: false,
66
+ desc: "Copy existing rows only; do not start WAL streaming consumer"
67
+ method_option :"dry-run", type: :boolean, default: false,
68
+ desc: "Print preflight report and target DDL without touching either cluster"
69
+ method_option :yes, type: :boolean, default: false, aliases: "-y",
70
+ desc: "Skip the confirmation prompt before starting migration"
71
+ method_option :"force-overwrite", type: :boolean, default: false,
72
+ desc: "Drop and recreate target tables if they already exist"
73
+ def migrate
74
+ config = load_config!
75
+ preflight_only = options[:"preflight-only"] || options[:"dry-run"]
76
+
77
+ unless preflight_only
78
+ raise Thor::Error,
79
+ "ERROR: migrate requires a 'target' section in your config.\n\n" \
80
+ "Use --preflight-only to validate without a target connection." if config.target.nil?
81
+
82
+ raise Thor::Error,
83
+ "ERROR: migrate requires a 'migrate' section in your config." if config.migrate.nil?
84
+ end
85
+
86
+ result = Preflight.new(config, options).run
87
+ Output::PreflightPrinter.new.print(result)
88
+
89
+ if preflight_only
90
+ exit(result.passed ? 0 : 1)
91
+ return
92
+ end
93
+
94
+ unless result.passed
95
+ raise Thor::Error, "Preflight failed. Fix the issue(s) above before running migrate."
96
+ end
97
+
98
+ unless options[:yes]
99
+ answer = ask("Proceed with migration? [y/N]")
100
+ return unless answer.strip.downcase == "y"
101
+ end
102
+
103
+ orchestrator = Migration::Orchestrator.new(config: config, options: options)
104
+ trap("INT") { orchestrator.request_stop }
105
+ trap("TERM") { orchestrator.request_stop }
106
+ orchestrator.run
107
+ rescue Replication::Error => e
108
+ raise Thor::Error, "ERROR: #{e.message}\n\nReplication stopped. Resume with --resume once the cause is resolved."
109
+ rescue Connection::Error => e
110
+ raise Thor::Error, "Connection failed: #{e.message}"
111
+ rescue Pcrd::Error => e
112
+ raise Thor::Error, "ERROR: #{e.message}"
113
+ end
114
+
115
+ desc "status", "Show current migration phase and replication lag"
116
+ long_desc <<~DESC
117
+ Reads the checkpoint database and queries pg_replication_slots to show:
118
+ current phase, backfill progress, replication lag in bytes and estimated
119
+ seconds, and whether the migration is ready for cutover.
120
+ DESC
121
+ def status
122
+ config = load_config!
123
+ Commands::Status.new(config, options).run
124
+ rescue Config::LoadError => e
125
+ raise Thor::Error, "ERROR: #{e.message}"
126
+ end
127
+
128
+ desc "readiness", "Report target objects (indexes, constraints) to add before cutover"
129
+ long_desc <<~DESC
130
+ The load schema created by `pcrd migrate` is intentionally minimal — just
131
+ the table and its primary key — so the bulk copy is fast. This command
132
+ compares the source and target and reports the secondary objects that must
133
+ be added to the target before cutover: non-PK indexes and foreign-key,
134
+ unique, and check constraints. It prints runnable DDL (CREATE INDEX
135
+ CONCURRENTLY / ALTER TABLE ADD CONSTRAINT) for the missing ones.
136
+
137
+ Objects that reference dropped or renamed columns are flagged for manual
138
+ review rather than auto-generated. Sequences and identity columns are
139
+ listed for reference but are restored automatically by `pcrd cutover`.
140
+
141
+ Read-only; safe to run any time after backfill.
142
+ DESC
143
+ def readiness
144
+ config = load_config!
145
+ result = Commands::Readiness.new(config, options).run
146
+ Output::ReadinessPrinter.new.print(result)
147
+ rescue Connection::Error => e
148
+ raise Thor::Error, "Connection failed: #{e.message}"
149
+ rescue Pcrd::Error => e
150
+ raise Thor::Error, "ERROR: #{e.message}"
151
+ end
152
+
153
+ desc "cutover", "Trigger the cutover sequence"
154
+ long_desc <<~DESC
155
+ Drains remaining replication lag to zero, advances sequences on the target
156
+ cluster, runs row-count verification, and prints a cutover report.
157
+
158
+ The application must be in maintenance mode before running this command.
159
+ Use --maintenance-confirmed to skip the interactive confirmation prompt.
160
+
161
+ After this command completes, update DATABASE_URL to point at the target
162
+ cluster and restart the application.
163
+ DESC
164
+ method_option :"maintenance-confirmed", type: :boolean, default: false,
165
+ desc: "Skip interactive confirmation that the app is in maintenance mode"
166
+ def cutover
167
+ config = load_config!
168
+
169
+ unless options[:"maintenance-confirmed"]
170
+ say "\nThe application must be in maintenance mode before continuing."
171
+ say "Maintenance mode options:"
172
+ say " pgBouncer: PAUSE <database>"
173
+ say " Kubernetes: kubectl scale --replicas=0 deployment/app"
174
+ say " Rails: enable maintenance middleware"
175
+ say ""
176
+ answer = ask("Is the application in maintenance mode? [y/N]")
177
+ return unless answer.strip.downcase == "y"
178
+ end
179
+
180
+ source_pool = Connection::Client.new(config.source)
181
+ target_pool = Connection::Client.new(config.target)
182
+ printer = Output::CutoverPrinter.new
183
+
184
+ say "\nRunning cutover sequence..."
185
+ orchestrator = Cutover::Orchestrator.new(
186
+ source_pool: source_pool,
187
+ target_pool: target_pool,
188
+ config: config
189
+ )
190
+
191
+ result = orchestrator.run(on_progress: ->(msg) { say " #{msg}" })
192
+
193
+ printer.print(result)
194
+
195
+ source_pool.close
196
+ target_pool.close
197
+
198
+ exit(result.passed ? 0 : 1)
199
+ rescue Connection::Error => e
200
+ raise Thor::Error, "Connection failed: #{e.message}"
201
+ end
202
+
203
+ desc "verify", "Compare row counts and spot-check rows across clusters"
204
+ long_desc <<~DESC
205
+ Compares row counts on source and target for each migrated table, then
206
+ spot-checks a random sample of rows field-by-field. Reports any mismatches.
207
+
208
+ Safe to run at any point after backfill completes.
209
+ DESC
210
+ method_option :"sample-size", type: :numeric, default: 1_000,
211
+ desc: "Number of rows to spot-check per table"
212
+ method_option :"post-cutover", type: :boolean, default: false,
213
+ desc: "Post-cutover mode: compare against the now-live target cluster"
214
+ def verify
215
+ config = load_config!
216
+ result = Commands::Verify.new(config, options).run
217
+ Output::CutoverPrinter.new.print_verify(result)
218
+ exit(result.passed ? 0 : 1)
219
+ rescue Connection::Error => e
220
+ raise Thor::Error, "Connection failed: #{e.message}"
221
+ rescue Pcrd::Error => e
222
+ raise Thor::Error, "ERROR: #{e.message}"
223
+ end
224
+
225
+ desc "demo SUBCOMMAND", "Set up and seed a demo database for testing and demonstration"
226
+ subcommand "demo", Commands::Demo
227
+
228
+ desc "cleanup", "Drop replication slot, publication, and checkpoint"
229
+ long_desc <<~DESC
230
+ Drops the replication slot and publication on the source cluster and deletes
231
+ the local checkpoint database. Run this after the application has been
232
+ successfully cut over to the target cluster and you no longer need to roll back.
233
+
234
+ With --drop-source, also drops the source tables. This is irreversible and
235
+ requires typing the table name to confirm.
236
+ DESC
237
+ method_option :"drop-source", type: :boolean, default: false,
238
+ desc: "Also drop source tables after cleanup (irreversible; requires confirmation)"
239
+ def cleanup
240
+ config = load_config!
241
+ Commands::Cleanup.new(config, options).run
242
+ rescue Connection::Error => e
243
+ raise Thor::Error, "Connection failed: #{e.message}"
244
+ rescue Pcrd::Error => e
245
+ raise Thor::Error, "ERROR: #{e.message}"
246
+ end
247
+
248
+ private
249
+
250
+ # Resolves the config file path and returns a loaded Config::Root.
251
+ # Falls back to pcrd.config.yml in the current directory if --config is omitted.
252
+ # Raises Thor::Error with a clear message if the file cannot be loaded.
253
+ def load_config!
254
+ path = options[:config] || default_config_path
255
+ Config::Loader.load(path)
256
+ rescue Config::LoadError => e
257
+ raise Thor::Error, "ERROR: #{e.message}"
258
+ end
259
+
260
+ def default_config_path
261
+ default = Config::DEFAULT_CONFIG_FILE
262
+ return default if File.exist?(default)
263
+
264
+ raise Thor::Error,
265
+ "ERROR: No config file found.\n\n" \
266
+ "Create pcrd.config.yml in the current directory, or pass --config path/to/config.yml\n\n" \
267
+ "Run `pcrd help migrate` for configuration documentation."
268
+ end
269
+
270
+ def require_config!
271
+ load_config!
272
+ end
273
+ end
274
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Commands
5
+ class Analyze
6
+ class Error < Pcrd::Error; end
7
+
8
+ def initialize(config, options = {})
9
+ @config = config
10
+ @options = Options.normalize(options)
11
+ end
12
+
13
+ def run
14
+ validate_config!
15
+
16
+ source_pool = Connection::Client.new(@config.source)
17
+ reader = Schema::Reader.new(source_pool)
18
+ packer = Schema::Packer.new
19
+ printer = Output::AnalyzePrinter.new
20
+
21
+ if compare_target?
22
+ run_compare(reader, packer, printer)
23
+ else
24
+ run_source_only(reader, packer, printer)
25
+ end
26
+
27
+ source_pool.close
28
+ end
29
+
30
+ private
31
+
32
+ def compare_target?
33
+ @options[:"compare-target"]
34
+ end
35
+
36
+ def run_source_only(reader, packer, printer)
37
+ tables_to_analyze.each do |table_name|
38
+ columns = reader.read(table_name)
39
+ row_count = reader.estimated_row_count(table_name)
40
+ report = packer.report(columns)
41
+
42
+ printer.print_table_report(
43
+ table_name: table_name,
44
+ row_count: row_count,
45
+ report: report
46
+ )
47
+ end
48
+ end
49
+
50
+ def run_compare(reader, packer, printer)
51
+ validate_compare_config!
52
+
53
+ target_pool = Connection::Client.new(@config.target)
54
+ target_reader = Schema::Reader.new(target_pool)
55
+ differ = Schema::Differ.new
56
+
57
+ tables_to_analyze.each do |table_name|
58
+ source_cols = reader.read(table_name)
59
+ row_count = reader.estimated_row_count(table_name)
60
+ table_config = find_table_config(table_name)
61
+
62
+ target_cols, target_is_live = resolve_target_columns(
63
+ table_name, table_config, target_reader, source_cols
64
+ )
65
+
66
+ entries = differ.diff(
67
+ source_columns: source_cols,
68
+ table_config: table_config,
69
+ target_columns: target_cols
70
+ )
71
+
72
+ printer.print_diff_report(
73
+ table_name: table_name,
74
+ row_count: row_count,
75
+ diff_entries: entries,
76
+ packer: packer,
77
+ target_is_live: target_is_live
78
+ )
79
+ end
80
+
81
+ target_pool.close
82
+ end
83
+
84
+ # Returns [target_columns_or_nil, is_live_boolean].
85
+ # Prefers a live target DB if the table exists; falls back to synthesis.
86
+ def resolve_target_columns(table_name, table_config, target_reader, source_cols)
87
+ if target_reader.table_exists?(table_name)
88
+ [target_reader.read(table_name), true]
89
+ else
90
+ # Synthesize: differ will build target columns from source + spec.
91
+ [nil, false]
92
+ end
93
+ rescue Connection::Error
94
+ # Target DB unreachable — fall back to synthesis.
95
+ [nil, false]
96
+ end
97
+
98
+ def tables_to_analyze
99
+ if @options[:table]
100
+ [@options[:table]]
101
+ elsif @config.analyze&.tables&.any?
102
+ @config.analyze.tables
103
+ elsif @config.migrate&.tables&.any?
104
+ @config.migrate.tables.map(&:name)
105
+ else
106
+ raise Error, "Nothing to analyze. Add an 'analyze' or 'migrate' section to your " \
107
+ "config, or pass --table TABLE_NAME."
108
+ end
109
+ end
110
+
111
+ def find_table_config(table_name)
112
+ @config.migrate&.tables&.find { |t| t.name == table_name }
113
+ end
114
+
115
+ def validate_config!
116
+ raise Error, "source connection is required for analyze" if @config.source.nil?
117
+ end
118
+
119
+ def validate_compare_config!
120
+ raise Error,
121
+ "--compare-target requires a 'target' section in your config" if @config.target.nil?
122
+ end
123
+ end
124
+ end
125
+ end