pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +24 -0
  3. data/LICENSE +21 -0
  4. data/README.md +614 -0
  5. data/bin/pcrd +7 -0
  6. data/lib/pcrd/advisory_lock.rb +50 -0
  7. data/lib/pcrd/apply/engine.rb +184 -0
  8. data/lib/pcrd/apply/worker.rb +97 -0
  9. data/lib/pcrd/backfill/batch.rb +158 -0
  10. data/lib/pcrd/backfill/engine.rb +153 -0
  11. data/lib/pcrd/checkpoint/store.rb +217 -0
  12. data/lib/pcrd/cli.rb +274 -0
  13. data/lib/pcrd/commands/analyze.rb +125 -0
  14. data/lib/pcrd/commands/cleanup.rb +112 -0
  15. data/lib/pcrd/commands/demo.rb +152 -0
  16. data/lib/pcrd/commands/readiness.rb +30 -0
  17. data/lib/pcrd/commands/status.rb +129 -0
  18. data/lib/pcrd/commands/verify.rb +172 -0
  19. data/lib/pcrd/config/add_column.rb +7 -0
  20. data/lib/pcrd/config/analyze_config.rb +8 -0
  21. data/lib/pcrd/config/column_spec.rb +10 -0
  22. data/lib/pcrd/config/connection.rb +7 -0
  23. data/lib/pcrd/config/cutover_config.rb +7 -0
  24. data/lib/pcrd/config/load_error.rb +7 -0
  25. data/lib/pcrd/config/loader.rb +158 -0
  26. data/lib/pcrd/config/migrate_config.rb +21 -0
  27. data/lib/pcrd/config/root.rb +9 -0
  28. data/lib/pcrd/config/schema.rb +62 -0
  29. data/lib/pcrd/config/table.rb +9 -0
  30. data/lib/pcrd/config/verify_config.rb +7 -0
  31. data/lib/pcrd/config.rb +7 -0
  32. data/lib/pcrd/connection/client.rb +129 -0
  33. data/lib/pcrd/connection/error.rb +7 -0
  34. data/lib/pcrd/connection/replication.rb +108 -0
  35. data/lib/pcrd/cutover/orchestrator.rb +108 -0
  36. data/lib/pcrd/cutover/sequences.rb +138 -0
  37. data/lib/pcrd/demo/generator.rb +214 -0
  38. data/lib/pcrd/demo/schema.rb +154 -0
  39. data/lib/pcrd/error.rb +12 -0
  40. data/lib/pcrd/migration/orchestrator.rb +272 -0
  41. data/lib/pcrd/monitor/lag.rb +107 -0
  42. data/lib/pcrd/options.rb +15 -0
  43. data/lib/pcrd/output/analyze_printer.rb +173 -0
  44. data/lib/pcrd/output/cutover_printer.rb +128 -0
  45. data/lib/pcrd/output/preflight_printer.rb +119 -0
  46. data/lib/pcrd/output/readiness_printer.rb +72 -0
  47. data/lib/pcrd/preflight.rb +331 -0
  48. data/lib/pcrd/readiness/manifest.rb +201 -0
  49. data/lib/pcrd/replication/consumer.rb +235 -0
  50. data/lib/pcrd/replication/error.rb +10 -0
  51. data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
  52. data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
  53. data/lib/pcrd/reporter/console.rb +46 -0
  54. data/lib/pcrd/reporter/null.rb +14 -0
  55. data/lib/pcrd/schema/column.rb +59 -0
  56. data/lib/pcrd/schema/ddl.rb +71 -0
  57. data/lib/pcrd/schema/diff_entry.rb +36 -0
  58. data/lib/pcrd/schema/differ.rb +175 -0
  59. data/lib/pcrd/schema/object_reader.rb +187 -0
  60. data/lib/pcrd/schema/packer.rb +90 -0
  61. data/lib/pcrd/schema/reader.rb +118 -0
  62. data/lib/pcrd/schema/setup.rb +143 -0
  63. data/lib/pcrd/schema/setup_error.rb +9 -0
  64. data/lib/pcrd/schema/table_not_found.rb +8 -0
  65. data/lib/pcrd/schema/type_registry.rb +116 -0
  66. data/lib/pcrd/sql.rb +55 -0
  67. data/lib/pcrd/transform/row_transformer.rb +69 -0
  68. data/lib/pcrd/transform/type_map.rb +209 -0
  69. data/lib/pcrd/transform/validator.rb +106 -0
  70. data/lib/pcrd/version.rb +5 -0
  71. data/lib/pcrd.rb +11 -0
  72. metadata +231 -0
@@ -0,0 +1,316 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Replication
5
+ module Pgoutput
6
+ # Errors raised by the parser.
7
+ class ParseError < StandardError; end
8
+ class UnknownMessage < ParseError; end
9
+
10
+ # Decodes raw pgoutput binary messages into Messages::* structs.
11
+ #
12
+ # The parser is stateful: it maintains a relation cache so that
13
+ # Insert/Update/Delete messages (which only carry a relation OID) can be
14
+ # enriched with column names from the most recently seen Relation message
15
+ # for that OID. Always feed the stream in LSN order; Relation messages
16
+ # always arrive before the first DML for a table.
17
+ #
18
+ # Input: raw bytes starting at the pgoutput type tag (i.e. after stripping
19
+ # the 25-byte XLogData header from the replication stream wrapper).
20
+ #
21
+ # PostgreSQL epoch reference: timestamps in pgoutput are microseconds
22
+ # since 2000-01-01 00:00:00 UTC (not the Unix epoch).
23
+ class Parser
24
+ # Offset in seconds from Unix epoch (1970-01-01) to PG epoch (2000-01-01).
25
+ PG_EPOCH_OFFSET = 946_684_800
26
+
27
+ # pgoutput message type tags → handler method names.
28
+ HANDLERS = {
29
+ "B" => :decode_begin,
30
+ "C" => :decode_commit,
31
+ "R" => :decode_relation,
32
+ "I" => :decode_insert,
33
+ "U" => :decode_update,
34
+ "D" => :decode_delete,
35
+ "T" => :decode_type,
36
+ "O" => :decode_origin,
37
+ "A" => :decode_truncate,
38
+ "M" => :decode_logical_message
39
+ }.freeze
40
+
41
+ def initialize
42
+ @relations = {} # OID → Messages::Relation
43
+ end
44
+
45
+ # Parse one raw pgoutput message payload.
46
+ # Returns the appropriate Messages::* struct.
47
+ def parse(data)
48
+ cur = Cursor.new(data)
49
+ tag = cur.read_char
50
+
51
+ handler = HANDLERS[tag]
52
+ raise UnknownMessage, "Unknown pgoutput tag: #{tag.inspect} (0x#{tag.ord.to_s(16)})" unless handler
53
+
54
+ send(handler, cur)
55
+ end
56
+
57
+ # Expose the relation cache for testing and for the WAL consumer.
58
+ def relation(oid)
59
+ @relations[oid]
60
+ end
61
+
62
+ private
63
+
64
+ # ── message decoders ────────────────────────────────────────────────
65
+
66
+ def decode_begin(cur)
67
+ Messages::Begin.new(
68
+ lsn: lsn_string(cur.read_uint64),
69
+ commit_time: pg_time(cur.read_int64),
70
+ xid: cur.read_uint32
71
+ )
72
+ end
73
+
74
+ def decode_commit(cur)
75
+ Messages::Commit.new(
76
+ flags: cur.read_uint8,
77
+ lsn: lsn_string(cur.read_uint64),
78
+ end_lsn: lsn_string(cur.read_uint64),
79
+ commit_time: pg_time(cur.read_int64)
80
+ )
81
+ end
82
+
83
+ def decode_relation(cur)
84
+ id = cur.read_uint32
85
+ namespace = cur.read_string
86
+ name = cur.read_string
87
+ replica_identity = cur.read_char
88
+ col_count = cur.read_uint16
89
+
90
+ columns = col_count.times.map do
91
+ Messages::RelationColumn.new(
92
+ flags: cur.read_uint8,
93
+ name: cur.read_string,
94
+ type_id: cur.read_uint32,
95
+ type_modifier: cur.read_int32
96
+ )
97
+ end
98
+
99
+ rel = Messages::Relation.new(
100
+ id: id, namespace: namespace, name: name,
101
+ replica_identity: replica_identity, columns: columns
102
+ )
103
+ @relations[id] = rel
104
+ rel
105
+ end
106
+
107
+ def decode_type(cur)
108
+ Messages::Type.new(
109
+ id: cur.read_uint32,
110
+ namespace: cur.read_string,
111
+ name: cur.read_string
112
+ )
113
+ end
114
+
115
+ def decode_insert(cur)
116
+ relation_id = cur.read_uint32
117
+ cur.read_char # always 'N' (new tuple)
118
+ Messages::Insert.new(
119
+ relation_id: relation_id,
120
+ new_tuple: read_tuple(cur, relation_id)
121
+ )
122
+ end
123
+
124
+ def decode_update(cur)
125
+ relation_id = cur.read_uint32
126
+ indicator = cur.read_char # 'K', 'O', or 'N'
127
+
128
+ old_tuple = nil
129
+ if indicator == "K" || indicator == "O"
130
+ old_tuple = read_tuple(cur, relation_id)
131
+ indicator = cur.read_char # consume 'N'
132
+ end
133
+ # indicator is now 'N'
134
+
135
+ Messages::Update.new(
136
+ relation_id: relation_id,
137
+ old_tuple: old_tuple,
138
+ new_tuple: read_tuple(cur, relation_id)
139
+ )
140
+ end
141
+
142
+ def decode_delete(cur)
143
+ relation_id = cur.read_uint32
144
+ cur.read_char # 'K' or 'O'
145
+ Messages::Delete.new(
146
+ relation_id: relation_id,
147
+ old_tuple: read_tuple(cur, relation_id)
148
+ )
149
+ end
150
+
151
+ def decode_origin(cur)
152
+ Messages::Origin.new(
153
+ lsn: lsn_string(cur.read_uint64),
154
+ name: cur.read_string
155
+ )
156
+ end
157
+
158
+ def decode_truncate(cur)
159
+ rel_count = cur.read_uint32
160
+ option_bits = cur.read_uint8
161
+ rel_ids = rel_count.times.map { cur.read_uint32 }
162
+ Messages::Truncate.new(option_bits: option_bits, relation_ids: rel_ids)
163
+ end
164
+
165
+ def decode_logical_message(cur)
166
+ flags = cur.read_uint8
167
+ lsn = lsn_string(cur.read_uint64)
168
+ prefix = cur.read_string
169
+ content_len = cur.read_uint32
170
+ content = cur.read_bytes(content_len)
171
+ Messages::LogicalMessage.new(flags: flags, lsn: lsn, prefix: prefix, content: content)
172
+ end
173
+
174
+ # ── tuple data ──────────────────────────────────────────────────────
175
+
176
+ # Reads TupleData and returns Hash<column_name, value>.
177
+ # Uses the cached Relation to map column positions to names.
178
+ def read_tuple(cur, relation_id)
179
+ col_count = cur.read_uint16
180
+ relation = @relations[relation_id]
181
+
182
+ # A DML message must be preceded by its Relation message in the stream.
183
+ # If it is not, inventing positional names ("col_0", ...) would route
184
+ # and apply garbage silently. Fail loudly instead so the consumer
185
+ # surfaces a replication error rather than corrupting the target.
186
+ unless relation
187
+ raise ParseError,
188
+ "No cached Relation for OID #{relation_id}; cannot decode tuple. " \
189
+ "The Relation message was missed or the stream is out of order."
190
+ end
191
+
192
+ col_count.times.each_with_object({}) do |i, hash|
193
+ col_kind = cur.read_char
194
+ col_name = relation.columns[i]&.name || "col_#{i}"
195
+
196
+ value = case col_kind
197
+ when "n" then nil # SQL NULL
198
+ when "u" then :toast # unchanged TOAST value
199
+ when "t"
200
+ len = cur.read_uint32
201
+ cur.read_bytes(len).then do |bytes|
202
+ bytes.encode("UTF-8", "binary", invalid: :replace, undef: :replace)
203
+ end
204
+ else
205
+ raise ParseError, "Unknown tuple column kind: #{col_kind.inspect}"
206
+ end
207
+
208
+ hash[col_name] = value
209
+ end
210
+ end
211
+
212
+ # ── helpers ─────────────────────────────────────────────────────────
213
+
214
+ def lsn_string(int64)
215
+ "%X/%X" % [int64 >> 32, int64 & 0xFFFF_FFFF]
216
+ end
217
+
218
+ def pg_time(microseconds)
219
+ secs = PG_EPOCH_OFFSET + microseconds / 1_000_000
220
+ usec = microseconds % 1_000_000
221
+ Time.at(secs, usec, :microsecond).utc
222
+ end
223
+
224
+ # ── cursor (private byte reader) ─────────────────────────────────────
225
+
226
+ # Sequential binary cursor. All integer reads are big-endian.
227
+ # String reads consume until the next null byte.
228
+ class Cursor
229
+ def initialize(data)
230
+ @data = data.b # force binary encoding for safe byte ops
231
+ @pos = 0
232
+ end
233
+
234
+ def read_char
235
+ c = @data[@pos]
236
+ @pos += 1
237
+ c
238
+ end
239
+
240
+ def read_uint8
241
+ b = @data.getbyte(@pos)
242
+ @pos += 1
243
+ b
244
+ end
245
+
246
+ def read_int8
247
+ b = @data[@pos, 1].unpack1("c")
248
+ @pos += 1
249
+ b
250
+ end
251
+
252
+ def read_uint16
253
+ v = @data[@pos, 2].unpack1("n")
254
+ @pos += 2
255
+ v
256
+ end
257
+
258
+ def read_int16
259
+ v = @data[@pos, 2].unpack1("s>")
260
+ @pos += 2
261
+ v
262
+ end
263
+
264
+ def read_uint32
265
+ v = @data[@pos, 4].unpack1("N")
266
+ @pos += 4
267
+ v
268
+ end
269
+
270
+ def read_int32
271
+ v = @data[@pos, 4].unpack1("l>")
272
+ @pos += 4
273
+ v
274
+ end
275
+
276
+ def read_uint64
277
+ v = @data[@pos, 8].unpack1("Q>")
278
+ @pos += 8
279
+ v
280
+ end
281
+
282
+ def read_int64
283
+ v = @data[@pos, 8].unpack1("q>")
284
+ @pos += 8
285
+ v
286
+ end
287
+
288
+ # Reads a null-terminated string. Returns UTF-8 (replacing bad bytes).
289
+ def read_string
290
+ null_pos = @data.index("\x00", @pos)
291
+ raise ParseError, "Unterminated string at offset #{@pos}" unless null_pos
292
+
293
+ bytes = @data[@pos, null_pos - @pos]
294
+ @pos = null_pos + 1
295
+ bytes.encode("UTF-8", "binary", invalid: :replace, undef: :replace)
296
+ end
297
+
298
+ def read_bytes(n)
299
+ bytes = @data[@pos, n]
300
+ @pos += n
301
+ bytes
302
+ end
303
+
304
+ def eof?
305
+ @pos >= @data.bytesize
306
+ end
307
+
308
+ def pos
309
+ @pos
310
+ end
311
+ end
312
+ private_constant :Cursor
313
+ end
314
+ end
315
+ end
316
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pastel"
4
+
5
+ module Pcrd
6
+ # Progress reporting interface used by long-running orchestration so it does
7
+ # not depend on Thor/CLI output directly. Implementations:
8
+ # Console — human-facing, colored
9
+ # Null — silent (tests, automation)
10
+ #
11
+ # Contract:
12
+ # info(msg) plain line
13
+ # success(msg) line, styled as success
14
+ # warn(msg) line, styled as a warning
15
+ # status(msg) transient same-line update (carriage return, no newline)
16
+ # green(str) -> styled inline string (for composing a status line)
17
+ module Reporter
18
+ class Console
19
+ def initialize(out: $stdout)
20
+ @out = out
21
+ @pastel = Pastel.new
22
+ end
23
+
24
+ def info(msg = "")
25
+ @out.puts(msg)
26
+ end
27
+
28
+ def success(msg)
29
+ @out.puts(@pastel.green(msg))
30
+ end
31
+
32
+ def warn(msg)
33
+ @out.puts(@pastel.yellow(msg))
34
+ end
35
+
36
+ def status(msg)
37
+ @out.print("\r#{msg}")
38
+ @out.flush
39
+ end
40
+
41
+ def green(str)
42
+ @pastel.green(str)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Reporter
5
+ # Silent reporter for tests and non-interactive automation.
6
+ class Null
7
+ def info(_msg = ""); end
8
+ def success(_msg); end
9
+ def warn(_msg); end
10
+ def status(_msg); end
11
+ def green(str = ""); str; end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ class Column
6
+ attr_reader :attnum, :name, :type_name, :formatted_type,
7
+ :alignment, :fixed_size, :nullable, :default_expr
8
+
9
+ # alignment: Integer (1, 2, 4, or 8 bytes)
10
+ # fixed_size: Integer bytes, or nil for variable-length
11
+ def initialize(attnum:, name:, type_name:, formatted_type:,
12
+ alignment:, fixed_size:, nullable:, default_expr:)
13
+ @attnum = attnum
14
+ @name = name
15
+ @type_name = type_name
16
+ @formatted_type = formatted_type
17
+ @alignment = alignment
18
+ @fixed_size = fixed_size
19
+ @nullable = nullable
20
+ @default_expr = default_expr
21
+ end
22
+
23
+ def variable?
24
+ fixed_size.nil?
25
+ end
26
+
27
+ def fixed?
28
+ !variable?
29
+ end
30
+
31
+ # Human-readable type string, with common verbose PG names shortened.
32
+ def display_type
33
+ formatted_type
34
+ .sub("character varying", "varchar")
35
+ .sub("character(", "char(")
36
+ .sub("timestamp without time zone", "timestamp")
37
+ .sub("timestamp with time zone", "timestamptz")
38
+ .sub("time without time zone", "time")
39
+ .sub("time with time zone", "timetz")
40
+ end
41
+
42
+ def display_size
43
+ fixed_size ? fixed_size.to_s : "variable"
44
+ end
45
+
46
+ def display_alignment
47
+ "#{alignment}B"
48
+ end
49
+
50
+ def ==(other)
51
+ other.is_a?(Column) && name == other.name && type_name == other.type_name
52
+ end
53
+
54
+ def inspect
55
+ "#<Pcrd::Schema::Column #{name}:#{display_type} align=#{alignment} size=#{display_size}>"
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Generates CREATE TABLE DDL for the target cluster from a source schema
6
+ # plus a Config::Table migration spec.
7
+ #
8
+ # Column ordering: if table_config.optimize_column_order is true, columns
9
+ # are sorted for minimal padding waste before rendering.
10
+ #
11
+ # Exclusions (by design):
12
+ # - Foreign key constraints: listed in preflight post-cutover checklist
13
+ # - Non-PK indexes: operator creates on target before cutover
14
+ # - GENERATED/identity: target uses plain type; sequence advanced at cutover
15
+ # - nextval() defaults: referencing source sequence; omitted from DDL
16
+ module DDL
17
+ # Returns a CREATE TABLE SQL string (no trailing semicolon — caller adds
18
+ # one if needed, or passes directly to exec_sql).
19
+ def self.generate(source_columns:, table_config:, primary_key_columns: [],
20
+ schema_name: "public")
21
+ target_cols = synthesize_target_columns(source_columns, table_config)
22
+ target_pk = map_pk_through_renames(primary_key_columns, table_config)
23
+
24
+ render(target_cols, table_config.name, schema_name, target_pk)
25
+ end
26
+
27
+ private_class_method def self.synthesize_target_columns(source_columns, table_config)
28
+ differ = Differ.new
29
+ entries = differ.diff(source_columns: source_columns, table_config: table_config)
30
+ cols = differ.target_columns_from_diff(entries)
31
+
32
+ if table_config.optimize_column_order
33
+ Packer.new.optimize(cols)
34
+ else
35
+ cols
36
+ end
37
+ end
38
+
39
+ private_class_method def self.render(columns, table_name, schema_name, pk_columns)
40
+ name_w = columns.map { |c| Sql.quote_ident(c.name).length }.max.to_i
41
+ type_w = columns.map { |c| c.display_type.length }.max.to_i
42
+
43
+ lines = columns.map { |c| column_line(c, name_w, type_w) }
44
+ lines << " PRIMARY KEY (#{Sql.quote_columns(pk_columns)})" if pk_columns.any?
45
+
46
+ "CREATE TABLE #{Sql.quote_table(table_name, schema: schema_name)} (\n#{lines.join(",\n")}\n)"
47
+ end
48
+
49
+ private_class_method def self.column_line(col, name_w, type_w)
50
+ parts = [" #{Sql.quote_ident(col.name).ljust(name_w)} #{col.display_type.ljust(type_w)}"]
51
+ parts << "NOT NULL" unless col.nullable
52
+ # Omit nextval() defaults — they reference source sequences.
53
+ # Identity columns (GENERATED ALWAYS AS IDENTITY) are also omitted;
54
+ # a plain column is created and the sequence is advanced at cutover.
55
+ if col.default_expr &&
56
+ !col.default_expr.start_with?("nextval(") &&
57
+ !col.default_expr.match?(/\bgenerated\b/i)
58
+ parts << "DEFAULT #{col.default_expr}"
59
+ end
60
+ parts.join(" ").rstrip
61
+ end
62
+
63
+ private_class_method def self.map_pk_through_renames(pk_columns, table_config)
64
+ renames = (table_config.columns || {}).each_with_object({}) do |(src, spec), map|
65
+ map[src.to_s] = spec.rename if spec.rename
66
+ end
67
+ pk_columns.map { |col| renames[col] || col }
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # One row in a schema diff: describes the relationship between a source column
6
+ # and its counterpart on the target.
7
+ #
8
+ # status values:
9
+ # :unchanged — column exists on both sides with the same name and type
10
+ # :type_changed — same name, different type
11
+ # :renamed — different name, same type
12
+ # :type_and_renamed — different name AND different type
13
+ # :dropped — exists on source, absent from target (per spec)
14
+ # :added — absent from source, new column on target (per spec)
15
+ DiffEntry = Data.define(:status, :source_column, :target_column) do
16
+ def source_name = source_column&.name
17
+ def target_name = target_column&.name
18
+
19
+ def type_changed? = %i[type_changed type_and_renamed].include?(status)
20
+ def renamed? = %i[renamed type_and_renamed].include?(status)
21
+ def dropped? = status == :dropped
22
+ def added? = status == :added
23
+
24
+ def status_label
25
+ case status
26
+ when :unchanged then "unchanged"
27
+ when :type_changed then "type changed"
28
+ when :renamed then "renamed"
29
+ when :type_and_renamed then "renamed + type changed"
30
+ when :dropped then "dropped"
31
+ when :added then "added"
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end