RubyGems - pcrd - Versions diffs - 0.1.0 - Mend

pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +24 -0
data/LICENSE +21 -0
data/README.md +614 -0
data/bin/pcrd +7 -0
data/lib/pcrd/advisory_lock.rb +50 -0
data/lib/pcrd/apply/engine.rb +184 -0
data/lib/pcrd/apply/worker.rb +97 -0
data/lib/pcrd/backfill/batch.rb +158 -0
data/lib/pcrd/backfill/engine.rb +153 -0
data/lib/pcrd/checkpoint/store.rb +217 -0
data/lib/pcrd/cli.rb +274 -0
data/lib/pcrd/commands/analyze.rb +125 -0
data/lib/pcrd/commands/cleanup.rb +112 -0
data/lib/pcrd/commands/demo.rb +152 -0
data/lib/pcrd/commands/readiness.rb +30 -0
data/lib/pcrd/commands/status.rb +129 -0
data/lib/pcrd/commands/verify.rb +172 -0
data/lib/pcrd/config/add_column.rb +7 -0
data/lib/pcrd/config/analyze_config.rb +8 -0
data/lib/pcrd/config/column_spec.rb +10 -0
data/lib/pcrd/config/connection.rb +7 -0
data/lib/pcrd/config/cutover_config.rb +7 -0
data/lib/pcrd/config/load_error.rb +7 -0
data/lib/pcrd/config/loader.rb +158 -0
data/lib/pcrd/config/migrate_config.rb +21 -0
data/lib/pcrd/config/root.rb +9 -0
data/lib/pcrd/config/schema.rb +62 -0
data/lib/pcrd/config/table.rb +9 -0
data/lib/pcrd/config/verify_config.rb +7 -0
data/lib/pcrd/config.rb +7 -0
data/lib/pcrd/connection/client.rb +129 -0
data/lib/pcrd/connection/error.rb +7 -0
data/lib/pcrd/connection/replication.rb +108 -0
data/lib/pcrd/cutover/orchestrator.rb +108 -0
data/lib/pcrd/cutover/sequences.rb +138 -0
data/lib/pcrd/demo/generator.rb +214 -0
data/lib/pcrd/demo/schema.rb +154 -0
data/lib/pcrd/error.rb +12 -0
data/lib/pcrd/migration/orchestrator.rb +272 -0
data/lib/pcrd/monitor/lag.rb +107 -0
data/lib/pcrd/options.rb +15 -0
data/lib/pcrd/output/analyze_printer.rb +173 -0
data/lib/pcrd/output/cutover_printer.rb +128 -0
data/lib/pcrd/output/preflight_printer.rb +119 -0
data/lib/pcrd/output/readiness_printer.rb +72 -0
data/lib/pcrd/preflight.rb +331 -0
data/lib/pcrd/readiness/manifest.rb +201 -0
data/lib/pcrd/replication/consumer.rb +235 -0
data/lib/pcrd/replication/error.rb +10 -0
data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
data/lib/pcrd/reporter/console.rb +46 -0
data/lib/pcrd/reporter/null.rb +14 -0
data/lib/pcrd/schema/column.rb +59 -0
data/lib/pcrd/schema/ddl.rb +71 -0
data/lib/pcrd/schema/diff_entry.rb +36 -0
data/lib/pcrd/schema/differ.rb +175 -0
data/lib/pcrd/schema/object_reader.rb +187 -0
data/lib/pcrd/schema/packer.rb +90 -0
data/lib/pcrd/schema/reader.rb +118 -0
data/lib/pcrd/schema/setup.rb +143 -0
data/lib/pcrd/schema/setup_error.rb +9 -0
data/lib/pcrd/schema/table_not_found.rb +8 -0
data/lib/pcrd/schema/type_registry.rb +116 -0
data/lib/pcrd/sql.rb +55 -0
data/lib/pcrd/transform/row_transformer.rb +69 -0
data/lib/pcrd/transform/type_map.rb +209 -0
data/lib/pcrd/transform/validator.rb +106 -0
data/lib/pcrd/version.rb +5 -0
data/lib/pcrd.rb +11 -0
metadata +231 -0

data/lib/pcrd/replication/pgoutput/parser.rb ADDED Viewed

@@ -0,0 +1,316 @@
+# frozen_string_literal: true
+module Pcrd
+  module Replication
+    module Pgoutput
+      # Errors raised by the parser.
+      class ParseError < StandardError; end
+      class UnknownMessage < ParseError; end
+      # Decodes raw pgoutput binary messages into Messages::* structs.
+      #
+      # The parser is stateful: it maintains a relation cache so that
+      # Insert/Update/Delete messages (which only carry a relation OID) can be
+      # enriched with column names from the most recently seen Relation message
+      # for that OID. Always feed the stream in LSN order; Relation messages
+      # always arrive before the first DML for a table.
+      #
+      # Input: raw bytes starting at the pgoutput type tag (i.e. after stripping
+      # the 25-byte XLogData header from the replication stream wrapper).
+      #
+      # PostgreSQL epoch reference: timestamps in pgoutput are microseconds
+      # since 2000-01-01 00:00:00 UTC (not the Unix epoch).
+      class Parser
+        # Offset in seconds from Unix epoch (1970-01-01) to PG epoch (2000-01-01).
+        PG_EPOCH_OFFSET = 946_684_800
+        # pgoutput message type tags → handler method names.
+        HANDLERS = {
+          "B" => :decode_begin,
+          "C" => :decode_commit,
+          "R" => :decode_relation,
+          "I" => :decode_insert,
+          "U" => :decode_update,
+          "D" => :decode_delete,
+          "T" => :decode_type,
+          "O" => :decode_origin,
+          "A" => :decode_truncate,
+          "M" => :decode_logical_message
+        }.freeze
+        def initialize
+          @relations = {}  # OID → Messages::Relation
+        end
+        # Parse one raw pgoutput message payload.
+        # Returns the appropriate Messages::* struct.
+        def parse(data)
+          cur = Cursor.new(data)
+          tag = cur.read_char
+          handler = HANDLERS[tag]
+          raise UnknownMessage, "Unknown pgoutput tag: #{tag.inspect} (0x#{tag.ord.to_s(16)})" unless handler
+          send(handler, cur)
+        end
+        # Expose the relation cache for testing and for the WAL consumer.
+        def relation(oid)
+          @relations[oid]
+        end
+        private
+        # ── message decoders ────────────────────────────────────────────────
+        def decode_begin(cur)
+          Messages::Begin.new(
+            lsn:         lsn_string(cur.read_uint64),
+            commit_time: pg_time(cur.read_int64),
+            xid:         cur.read_uint32
+          )
+        end
+        def decode_commit(cur)
+          Messages::Commit.new(
+            flags:       cur.read_uint8,
+            lsn:         lsn_string(cur.read_uint64),
+            end_lsn:     lsn_string(cur.read_uint64),
+            commit_time: pg_time(cur.read_int64)
+          )
+        end
+        def decode_relation(cur)
+          id               = cur.read_uint32
+          namespace        = cur.read_string
+          name             = cur.read_string
+          replica_identity = cur.read_char
+          col_count        = cur.read_uint16
+          columns = col_count.times.map do
+            Messages::RelationColumn.new(
+              flags:         cur.read_uint8,
+              name:          cur.read_string,
+              type_id:       cur.read_uint32,
+              type_modifier: cur.read_int32
+            )
+          end
+          rel = Messages::Relation.new(
+            id: id, namespace: namespace, name: name,
+            replica_identity: replica_identity, columns: columns
+          )
+          @relations[id] = rel
+          rel
+        end
+        def decode_type(cur)
+          Messages::Type.new(
+            id:        cur.read_uint32,
+            namespace: cur.read_string,
+            name:      cur.read_string
+          )
+        end
+        def decode_insert(cur)
+          relation_id = cur.read_uint32
+          cur.read_char  # always 'N' (new tuple)
+          Messages::Insert.new(
+            relation_id: relation_id,
+            new_tuple:   read_tuple(cur, relation_id)
+          )
+        end
+        def decode_update(cur)
+          relation_id = cur.read_uint32
+          indicator   = cur.read_char  # 'K', 'O', or 'N'
+          old_tuple = nil
+          if indicator == "K" || indicator == "O"
+            old_tuple = read_tuple(cur, relation_id)
+            indicator = cur.read_char  # consume 'N'
+          end
+          # indicator is now 'N'
+          Messages::Update.new(
+            relation_id: relation_id,
+            old_tuple:   old_tuple,
+            new_tuple:   read_tuple(cur, relation_id)
+          )
+        end
+        def decode_delete(cur)
+          relation_id = cur.read_uint32
+          cur.read_char  # 'K' or 'O'
+          Messages::Delete.new(
+            relation_id: relation_id,
+            old_tuple:   read_tuple(cur, relation_id)
+          )
+        end
+        def decode_origin(cur)
+          Messages::Origin.new(
+            lsn:  lsn_string(cur.read_uint64),
+            name: cur.read_string
+          )
+        end
+        def decode_truncate(cur)
+          rel_count   = cur.read_uint32
+          option_bits = cur.read_uint8
+          rel_ids     = rel_count.times.map { cur.read_uint32 }
+          Messages::Truncate.new(option_bits: option_bits, relation_ids: rel_ids)
+        end
+        def decode_logical_message(cur)
+          flags       = cur.read_uint8
+          lsn         = lsn_string(cur.read_uint64)
+          prefix      = cur.read_string
+          content_len = cur.read_uint32
+          content     = cur.read_bytes(content_len)
+          Messages::LogicalMessage.new(flags: flags, lsn: lsn, prefix: prefix, content: content)
+        end
+        # ── tuple data ──────────────────────────────────────────────────────
+        # Reads TupleData and returns Hash<column_name, value>.
+        # Uses the cached Relation to map column positions to names.
+        def read_tuple(cur, relation_id)
+          col_count = cur.read_uint16
+          relation  = @relations[relation_id]
+          # A DML message must be preceded by its Relation message in the stream.
+          # If it is not, inventing positional names ("col_0", ...) would route
+          # and apply garbage silently. Fail loudly instead so the consumer
+          # surfaces a replication error rather than corrupting the target.
+          unless relation
+            raise ParseError,
+                  "No cached Relation for OID #{relation_id}; cannot decode tuple. " \
+                  "The Relation message was missed or the stream is out of order."
+          end
+          col_count.times.each_with_object({}) do |i, hash|
+            col_kind = cur.read_char
+            col_name = relation.columns[i]&.name || "col_#{i}"
+            value = case col_kind
+                    when "n" then nil        # SQL NULL
+                    when "u" then :toast     # unchanged TOAST value
+                    when "t"
+                      len = cur.read_uint32
+                      cur.read_bytes(len).then do |bytes|
+                        bytes.encode("UTF-8", "binary", invalid: :replace, undef: :replace)
+                      end
+                    else
+                      raise ParseError, "Unknown tuple column kind: #{col_kind.inspect}"
+                    end
+            hash[col_name] = value
+          end
+        end
+        # ── helpers ─────────────────────────────────────────────────────────
+        def lsn_string(int64)
+          "%X/%X" % [int64 >> 32, int64 & 0xFFFF_FFFF]
+        end
+        def pg_time(microseconds)
+          secs = PG_EPOCH_OFFSET + microseconds / 1_000_000
+          usec = microseconds % 1_000_000
+          Time.at(secs, usec, :microsecond).utc
+        end
+        # ── cursor (private byte reader) ─────────────────────────────────────
+        # Sequential binary cursor. All integer reads are big-endian.
+        # String reads consume until the next null byte.
+        class Cursor
+          def initialize(data)
+            @data = data.b   # force binary encoding for safe byte ops
+            @pos  = 0
+          end
+          def read_char
+            c = @data[@pos]
+            @pos += 1
+            c
+          end
+          def read_uint8
+            b = @data.getbyte(@pos)
+            @pos += 1
+            b
+          end
+          def read_int8
+            b = @data[@pos, 1].unpack1("c")
+            @pos += 1
+            b
+          end
+          def read_uint16
+            v = @data[@pos, 2].unpack1("n")
+            @pos += 2
+            v
+          end
+          def read_int16
+            v = @data[@pos, 2].unpack1("s>")
+            @pos += 2
+            v
+          end
+          def read_uint32
+            v = @data[@pos, 4].unpack1("N")
+            @pos += 4
+            v
+          end
+          def read_int32
+            v = @data[@pos, 4].unpack1("l>")
+            @pos += 4
+            v
+          end
+          def read_uint64
+            v = @data[@pos, 8].unpack1("Q>")
+            @pos += 8
+            v
+          end
+          def read_int64
+            v = @data[@pos, 8].unpack1("q>")
+            @pos += 8
+            v
+          end
+          # Reads a null-terminated string. Returns UTF-8 (replacing bad bytes).
+          def read_string
+            null_pos = @data.index("\x00", @pos)
+            raise ParseError, "Unterminated string at offset #{@pos}" unless null_pos
+            bytes = @data[@pos, null_pos - @pos]
+            @pos  = null_pos + 1
+            bytes.encode("UTF-8", "binary", invalid: :replace, undef: :replace)
+          end
+          def read_bytes(n)
+            bytes = @data[@pos, n]
+            @pos += n
+            bytes
+          end
+          def eof?
+            @pos >= @data.bytesize
+          end
+          def pos
+            @pos
+          end
+        end
+        private_constant :Cursor
+      end
+    end
+  end
+end

data/lib/pcrd/reporter/console.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+require "pastel"
+module Pcrd
+  # Progress reporting interface used by long-running orchestration so it does
+  # not depend on Thor/CLI output directly. Implementations:
+  #   Console — human-facing, colored
+  #   Null    — silent (tests, automation)
+  #
+  # Contract:
+  #   info(msg)     plain line
+  #   success(msg)  line, styled as success
+  #   warn(msg)     line, styled as a warning
+  #   status(msg)   transient same-line update (carriage return, no newline)
+  #   green(str)    -> styled inline string (for composing a status line)
+  module Reporter
+    class Console
+      def initialize(out: $stdout)
+        @out    = out
+        @pastel = Pastel.new
+      end
+      def info(msg = "")
+        @out.puts(msg)
+      end
+      def success(msg)
+        @out.puts(@pastel.green(msg))
+      end
+      def warn(msg)
+        @out.puts(@pastel.yellow(msg))
+      end
+      def status(msg)
+        @out.print("\r#{msg}")
+        @out.flush
+      end
+      def green(str)
+        @pastel.green(str)
+      end
+    end
+  end
+end

data/lib/pcrd/reporter/null.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+module Pcrd
+  module Reporter
+    # Silent reporter for tests and non-interactive automation.
+    class Null
+      def info(_msg = "");  end
+      def success(_msg);    end
+      def warn(_msg);       end
+      def status(_msg);     end
+      def green(str = "");  str; end
+    end
+  end
+end

data/lib/pcrd/schema/column.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+module Pcrd
+  module Schema
+    class Column
+      attr_reader :attnum, :name, :type_name, :formatted_type,
+                  :alignment, :fixed_size, :nullable, :default_expr
+      # alignment: Integer (1, 2, 4, or 8 bytes)
+      # fixed_size: Integer bytes, or nil for variable-length
+      def initialize(attnum:, name:, type_name:, formatted_type:,
+                     alignment:, fixed_size:, nullable:, default_expr:)
+        @attnum         = attnum
+        @name           = name
+        @type_name      = type_name
+        @formatted_type = formatted_type
+        @alignment      = alignment
+        @fixed_size     = fixed_size
+        @nullable       = nullable
+        @default_expr   = default_expr
+      end
+      def variable?
+        fixed_size.nil?
+      end
+      def fixed?
+        !variable?
+      end
+      # Human-readable type string, with common verbose PG names shortened.
+      def display_type
+        formatted_type
+          .sub("character varying", "varchar")
+          .sub("character(", "char(")
+          .sub("timestamp without time zone", "timestamp")
+          .sub("timestamp with time zone", "timestamptz")
+          .sub("time without time zone", "time")
+          .sub("time with time zone", "timetz")
+      end
+      def display_size
+        fixed_size ? fixed_size.to_s : "variable"
+      end
+      def display_alignment
+        "#{alignment}B"
+      end
+      def ==(other)
+        other.is_a?(Column) && name == other.name && type_name == other.type_name
+      end
+      def inspect
+        "#<Pcrd::Schema::Column #{name}:#{display_type} align=#{alignment} size=#{display_size}>"
+      end
+    end
+  end
+end

data/lib/pcrd/schema/ddl.rb ADDED Viewed

@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+module Pcrd
+  module Schema
+    # Generates CREATE TABLE DDL for the target cluster from a source schema
+    # plus a Config::Table migration spec.
+    #
+    # Column ordering: if table_config.optimize_column_order is true, columns
+    # are sorted for minimal padding waste before rendering.
+    #
+    # Exclusions (by design):
+    #   - Foreign key constraints: listed in preflight post-cutover checklist
+    #   - Non-PK indexes:          operator creates on target before cutover
+    #   - GENERATED/identity:      target uses plain type; sequence advanced at cutover
+    #   - nextval() defaults:      referencing source sequence; omitted from DDL
+    module DDL
+      # Returns a CREATE TABLE SQL string (no trailing semicolon — caller adds
+      # one if needed, or passes directly to exec_sql).
+      def self.generate(source_columns:, table_config:, primary_key_columns: [],
+                        schema_name: "public")
+        target_cols = synthesize_target_columns(source_columns, table_config)
+        target_pk   = map_pk_through_renames(primary_key_columns, table_config)
+        render(target_cols, table_config.name, schema_name, target_pk)
+      end
+      private_class_method def self.synthesize_target_columns(source_columns, table_config)
+        differ  = Differ.new
+        entries = differ.diff(source_columns: source_columns, table_config: table_config)
+        cols    = differ.target_columns_from_diff(entries)
+        if table_config.optimize_column_order
+          Packer.new.optimize(cols)
+        else
+          cols
+        end
+      end
+      private_class_method def self.render(columns, table_name, schema_name, pk_columns)
+        name_w = columns.map { |c| Sql.quote_ident(c.name).length }.max.to_i
+        type_w = columns.map { |c| c.display_type.length }.max.to_i
+        lines = columns.map { |c| column_line(c, name_w, type_w) }
+        lines << "  PRIMARY KEY (#{Sql.quote_columns(pk_columns)})" if pk_columns.any?
+        "CREATE TABLE #{Sql.quote_table(table_name, schema: schema_name)} (\n#{lines.join(",\n")}\n)"
+      end
+      private_class_method def self.column_line(col, name_w, type_w)
+        parts = ["  #{Sql.quote_ident(col.name).ljust(name_w)}  #{col.display_type.ljust(type_w)}"]
+        parts << "NOT NULL" unless col.nullable
+        # Omit nextval() defaults — they reference source sequences.
+        # Identity columns (GENERATED ALWAYS AS IDENTITY) are also omitted;
+        # a plain column is created and the sequence is advanced at cutover.
+        if col.default_expr &&
+           !col.default_expr.start_with?("nextval(") &&
+           !col.default_expr.match?(/\bgenerated\b/i)
+          parts << "DEFAULT #{col.default_expr}"
+        end
+        parts.join("  ").rstrip
+      end
+      private_class_method def self.map_pk_through_renames(pk_columns, table_config)
+        renames = (table_config.columns || {}).each_with_object({}) do |(src, spec), map|
+          map[src.to_s] = spec.rename if spec.rename
+        end
+        pk_columns.map { |col| renames[col] || col }
+      end
+    end
+  end
+end

data/lib/pcrd/schema/diff_entry.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+module Pcrd
+  module Schema
+    # One row in a schema diff: describes the relationship between a source column
+    # and its counterpart on the target.
+    #
+    # status values:
+    #   :unchanged       — column exists on both sides with the same name and type
+    #   :type_changed    — same name, different type
+    #   :renamed         — different name, same type
+    #   :type_and_renamed — different name AND different type
+    #   :dropped         — exists on source, absent from target (per spec)
+    #   :added           — absent from source, new column on target (per spec)
+    DiffEntry = Data.define(:status, :source_column, :target_column) do
+      def source_name = source_column&.name
+      def target_name = target_column&.name
+      def type_changed? = %i[type_changed type_and_renamed].include?(status)
+      def renamed?      = %i[renamed type_and_renamed].include?(status)
+      def dropped?      = status == :dropped
+      def added?        = status == :added
+      def status_label
+        case status
+        when :unchanged        then "unchanged"
+        when :type_changed     then "type changed"
+        when :renamed          then "renamed"
+        when :type_and_renamed then "renamed + type changed"
+        when :dropped          then "dropped"
+        when :added            then "added"
+        end
+      end
+    end
+  end
+end