pcrd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +24 -0
- data/LICENSE +21 -0
- data/README.md +614 -0
- data/bin/pcrd +7 -0
- data/lib/pcrd/advisory_lock.rb +50 -0
- data/lib/pcrd/apply/engine.rb +184 -0
- data/lib/pcrd/apply/worker.rb +97 -0
- data/lib/pcrd/backfill/batch.rb +158 -0
- data/lib/pcrd/backfill/engine.rb +153 -0
- data/lib/pcrd/checkpoint/store.rb +217 -0
- data/lib/pcrd/cli.rb +274 -0
- data/lib/pcrd/commands/analyze.rb +125 -0
- data/lib/pcrd/commands/cleanup.rb +112 -0
- data/lib/pcrd/commands/demo.rb +152 -0
- data/lib/pcrd/commands/readiness.rb +30 -0
- data/lib/pcrd/commands/status.rb +129 -0
- data/lib/pcrd/commands/verify.rb +172 -0
- data/lib/pcrd/config/add_column.rb +7 -0
- data/lib/pcrd/config/analyze_config.rb +8 -0
- data/lib/pcrd/config/column_spec.rb +10 -0
- data/lib/pcrd/config/connection.rb +7 -0
- data/lib/pcrd/config/cutover_config.rb +7 -0
- data/lib/pcrd/config/load_error.rb +7 -0
- data/lib/pcrd/config/loader.rb +158 -0
- data/lib/pcrd/config/migrate_config.rb +21 -0
- data/lib/pcrd/config/root.rb +9 -0
- data/lib/pcrd/config/schema.rb +62 -0
- data/lib/pcrd/config/table.rb +9 -0
- data/lib/pcrd/config/verify_config.rb +7 -0
- data/lib/pcrd/config.rb +7 -0
- data/lib/pcrd/connection/client.rb +129 -0
- data/lib/pcrd/connection/error.rb +7 -0
- data/lib/pcrd/connection/replication.rb +108 -0
- data/lib/pcrd/cutover/orchestrator.rb +108 -0
- data/lib/pcrd/cutover/sequences.rb +138 -0
- data/lib/pcrd/demo/generator.rb +214 -0
- data/lib/pcrd/demo/schema.rb +154 -0
- data/lib/pcrd/error.rb +12 -0
- data/lib/pcrd/migration/orchestrator.rb +272 -0
- data/lib/pcrd/monitor/lag.rb +107 -0
- data/lib/pcrd/options.rb +15 -0
- data/lib/pcrd/output/analyze_printer.rb +173 -0
- data/lib/pcrd/output/cutover_printer.rb +128 -0
- data/lib/pcrd/output/preflight_printer.rb +119 -0
- data/lib/pcrd/output/readiness_printer.rb +72 -0
- data/lib/pcrd/preflight.rb +331 -0
- data/lib/pcrd/readiness/manifest.rb +201 -0
- data/lib/pcrd/replication/consumer.rb +235 -0
- data/lib/pcrd/replication/error.rb +10 -0
- data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
- data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
- data/lib/pcrd/reporter/console.rb +46 -0
- data/lib/pcrd/reporter/null.rb +14 -0
- data/lib/pcrd/schema/column.rb +59 -0
- data/lib/pcrd/schema/ddl.rb +71 -0
- data/lib/pcrd/schema/diff_entry.rb +36 -0
- data/lib/pcrd/schema/differ.rb +175 -0
- data/lib/pcrd/schema/object_reader.rb +187 -0
- data/lib/pcrd/schema/packer.rb +90 -0
- data/lib/pcrd/schema/reader.rb +118 -0
- data/lib/pcrd/schema/setup.rb +143 -0
- data/lib/pcrd/schema/setup_error.rb +9 -0
- data/lib/pcrd/schema/table_not_found.rb +8 -0
- data/lib/pcrd/schema/type_registry.rb +116 -0
- data/lib/pcrd/sql.rb +55 -0
- data/lib/pcrd/transform/row_transformer.rb +69 -0
- data/lib/pcrd/transform/type_map.rb +209 -0
- data/lib/pcrd/transform/validator.rb +106 -0
- data/lib/pcrd/version.rb +5 -0
- data/lib/pcrd.rb +11 -0
- metadata +231 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Schema
|
|
5
|
+
# Creates target tables from the migration spec.
|
|
6
|
+
# Called at the start of `pcrd migrate` (after preflight passes).
|
|
7
|
+
#
|
|
8
|
+
# In the full streaming flow (Phase 9+), Setup also creates the publication
|
|
9
|
+
# and replication slot on source. For --backfill-only those are skipped.
|
|
10
|
+
class Setup
|
|
11
|
+
def initialize(source_pool:, target_pool:, config:)
|
|
12
|
+
@source_pool = source_pool
|
|
13
|
+
@target_pool = target_pool
|
|
14
|
+
@config = config
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Creates the publication and replication slot on the source.
|
|
18
|
+
# Returns the slot's starting LSN as a "X/Y" string — pass this to the
|
|
19
|
+
# consumer so streaming begins from a point that covers all of backfill.
|
|
20
|
+
# Idempotently ensures the publication and replication slot exist for a
|
|
21
|
+
# fresh migration, returning the slot's starting LSN ("X/Y").
|
|
22
|
+
#
|
|
23
|
+
# A leftover publication from a partial prior run is reused if it covers
|
|
24
|
+
# exactly the configured tables (it is just a definition). A leftover
|
|
25
|
+
# slot is NOT reused: its WAL position is unknown relative to backfill, so
|
|
26
|
+
# we refuse and point the operator at --resume or `pcrd cleanup`.
|
|
27
|
+
def create_publication_and_slot(pub_name:, slot_name:)
|
|
28
|
+
ensure_publication(pub_name)
|
|
29
|
+
|
|
30
|
+
if slot_exists?(slot_name)
|
|
31
|
+
raise SetupError, "Replication slot '#{slot_name}' already exists. Resume the existing " \
|
|
32
|
+
"migration with --resume, or remove it with `pcrd cleanup` to start over."
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
result = @source_pool.exec(
|
|
36
|
+
"SELECT lsn FROM pg_create_logical_replication_slot($1, 'pgoutput')",
|
|
37
|
+
[slot_name]
|
|
38
|
+
)
|
|
39
|
+
result[0]["lsn"]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Validates that a --resume run has the slot and publication it needs.
|
|
43
|
+
# Raises with a clear message if either is missing.
|
|
44
|
+
def validate_resumable!(pub_name:, slot_name:)
|
|
45
|
+
unless slot_exists?(slot_name)
|
|
46
|
+
raise SetupError, "Cannot resume: replication slot '#{slot_name}' does not exist on the source. " \
|
|
47
|
+
"Start a fresh migration (without --resume)."
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
unless publication_exists?(pub_name)
|
|
51
|
+
raise SetupError, "Cannot resume: publication '#{pub_name}' does not exist on the source. " \
|
|
52
|
+
"Start a fresh migration (without --resume)."
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Drops the publication and replication slot (cleanup phase).
|
|
57
|
+
def drop_publication_and_slot(pub_name:, slot_name:)
|
|
58
|
+
@source_pool.exec_sql(
|
|
59
|
+
"DROP PUBLICATION IF EXISTS #{@source_pool.quote_ident(pub_name)}"
|
|
60
|
+
)
|
|
61
|
+
@source_pool.exec(
|
|
62
|
+
"SELECT pg_drop_replication_slot($1) WHERE EXISTS (" \
|
|
63
|
+
" SELECT 1 FROM pg_replication_slots WHERE slot_name = $1)",
|
|
64
|
+
[slot_name]
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Creates all target tables and returns a Hash<table_name, ddl_string>.
|
|
69
|
+
# Raises if a target table already exists (use --force-overwrite to drop first).
|
|
70
|
+
def create_target_tables(force_overwrite: false)
|
|
71
|
+
reader = Reader.new(@source_pool)
|
|
72
|
+
ddls = {}
|
|
73
|
+
|
|
74
|
+
@config.migrate.tables.each do |table_config|
|
|
75
|
+
name = table_config.name
|
|
76
|
+
source_cols = reader.read(name)
|
|
77
|
+
pk_cols = reader.primary_key_columns(name)
|
|
78
|
+
|
|
79
|
+
ddl = DDL.generate(
|
|
80
|
+
source_columns: source_cols,
|
|
81
|
+
table_config: table_config,
|
|
82
|
+
primary_key_columns: pk_cols
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
target_reader = Reader.new(@target_pool)
|
|
86
|
+
if target_reader.table_exists?(name)
|
|
87
|
+
if force_overwrite
|
|
88
|
+
@target_pool.exec_sql("DROP TABLE IF EXISTS #{Sql.quote_table(name)} CASCADE")
|
|
89
|
+
else
|
|
90
|
+
raise SetupError, "Table '#{name}' already exists on target. " \
|
|
91
|
+
"Pass --force-overwrite to drop and recreate."
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
@target_pool.exec_sql("#{ddl};")
|
|
96
|
+
ddls[name] = ddl
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
ddls
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Creates the publication if absent; reuses it if it already covers exactly
|
|
105
|
+
# the configured tables; raises if it exists but covers a different set.
|
|
106
|
+
def ensure_publication(pub_name)
|
|
107
|
+
configured = @config.migrate.tables.map(&:name).sort
|
|
108
|
+
|
|
109
|
+
if publication_exists?(pub_name)
|
|
110
|
+
existing = publication_tables(pub_name).sort
|
|
111
|
+
return if existing == configured
|
|
112
|
+
|
|
113
|
+
raise SetupError, "Publication '#{pub_name}' already exists but covers #{existing.inspect}, " \
|
|
114
|
+
"not the configured tables #{configured.inspect}. " \
|
|
115
|
+
"Drop it with `pcrd cleanup` or reconcile the config."
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
table_list = @config.migrate.tables.map { |t| Sql.quote_table(t.name) }.join(", ")
|
|
119
|
+
@source_pool.exec_sql(
|
|
120
|
+
"CREATE PUBLICATION #{@source_pool.quote_ident(pub_name)} FOR TABLE #{table_list}"
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def publication_exists?(pub_name)
|
|
125
|
+
@source_pool.exec(
|
|
126
|
+
"SELECT 1 FROM pg_publication WHERE pubname = $1", [pub_name]
|
|
127
|
+
).ntuples.positive?
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def publication_tables(pub_name)
|
|
131
|
+
@source_pool.exec(
|
|
132
|
+
"SELECT tablename FROM pg_publication_tables WHERE pubname = $1", [pub_name]
|
|
133
|
+
).column_values(0)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def slot_exists?(slot_name)
|
|
137
|
+
@source_pool.exec(
|
|
138
|
+
"SELECT 1 FROM pg_replication_slots WHERE slot_name = $1", [slot_name]
|
|
139
|
+
).ntuples.positive?
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Schema
|
|
5
|
+
# Maps PostgreSQL type name strings (as they appear in migration specs)
|
|
6
|
+
# to the physical storage properties needed for padding analysis and
|
|
7
|
+
# synthetic column construction.
|
|
8
|
+
#
|
|
9
|
+
# Used when building target Schema::Column objects from a migration spec
|
|
10
|
+
# without a real target DB connection.
|
|
11
|
+
module TypeRegistry
|
|
12
|
+
TypeInfo = Data.define(:canonical_name, :alignment, :fixed_size)
|
|
13
|
+
|
|
14
|
+
# Fixed-size types with exact mappings.
|
|
15
|
+
FIXED = {
|
|
16
|
+
# 8-byte, 8-byte aligned
|
|
17
|
+
"bigint" => TypeInfo.new(canonical_name: "bigint", alignment: 8,
|
|
18
|
+
fixed_size: 8),
|
|
19
|
+
"int8" => TypeInfo.new(canonical_name: "bigint", alignment: 8,
|
|
20
|
+
fixed_size: 8),
|
|
21
|
+
"double precision" => TypeInfo.new(canonical_name: "double precision", alignment: 8,
|
|
22
|
+
fixed_size: 8),
|
|
23
|
+
"float8" => TypeInfo.new(canonical_name: "double precision", alignment: 8,
|
|
24
|
+
fixed_size: 8),
|
|
25
|
+
"timestamp" => TypeInfo.new(canonical_name: "timestamp", alignment: 8,
|
|
26
|
+
fixed_size: 8),
|
|
27
|
+
"timestamp without time zone" => TypeInfo.new(canonical_name: "timestamp", alignment: 8,
|
|
28
|
+
fixed_size: 8),
|
|
29
|
+
"timestamptz" => TypeInfo.new(canonical_name: "timestamptz", alignment: 8,
|
|
30
|
+
fixed_size: 8),
|
|
31
|
+
"timestamp with time zone" => TypeInfo.new(canonical_name: "timestamptz", alignment: 8,
|
|
32
|
+
fixed_size: 8),
|
|
33
|
+
"interval" => TypeInfo.new(canonical_name: "interval", alignment: 8,
|
|
34
|
+
fixed_size: 16),
|
|
35
|
+
"money" => TypeInfo.new(canonical_name: "money", alignment: 8,
|
|
36
|
+
fixed_size: 8),
|
|
37
|
+
# 4-byte, 4-byte aligned
|
|
38
|
+
"integer" => TypeInfo.new(canonical_name: "integer", alignment: 4,
|
|
39
|
+
fixed_size: 4),
|
|
40
|
+
"int4" => TypeInfo.new(canonical_name: "integer", alignment: 4,
|
|
41
|
+
fixed_size: 4),
|
|
42
|
+
"int" => TypeInfo.new(canonical_name: "integer", alignment: 4,
|
|
43
|
+
fixed_size: 4),
|
|
44
|
+
"real" => TypeInfo.new(canonical_name: "real", alignment: 4,
|
|
45
|
+
fixed_size: 4),
|
|
46
|
+
"float4" => TypeInfo.new(canonical_name: "real", alignment: 4,
|
|
47
|
+
fixed_size: 4),
|
|
48
|
+
"date" => TypeInfo.new(canonical_name: "date", alignment: 4,
|
|
49
|
+
fixed_size: 4),
|
|
50
|
+
"time" => TypeInfo.new(canonical_name: "time", alignment: 4,
|
|
51
|
+
fixed_size: 8),
|
|
52
|
+
"time without time zone" => TypeInfo.new(canonical_name: "time", alignment: 4,
|
|
53
|
+
fixed_size: 8),
|
|
54
|
+
"oid" => TypeInfo.new(canonical_name: "oid", alignment: 4,
|
|
55
|
+
fixed_size: 4),
|
|
56
|
+
# 2-byte, 2-byte aligned
|
|
57
|
+
"smallint" => TypeInfo.new(canonical_name: "smallint", alignment: 2,
|
|
58
|
+
fixed_size: 2),
|
|
59
|
+
"int2" => TypeInfo.new(canonical_name: "smallint", alignment: 2,
|
|
60
|
+
fixed_size: 2),
|
|
61
|
+
# 1-byte, 1-byte aligned
|
|
62
|
+
"boolean" => TypeInfo.new(canonical_name: "boolean", alignment: 1,
|
|
63
|
+
fixed_size: 1),
|
|
64
|
+
"bool" => TypeInfo.new(canonical_name: "boolean", alignment: 1,
|
|
65
|
+
fixed_size: 1),
|
|
66
|
+
"\"char\"" => TypeInfo.new(canonical_name: "\"char\"", alignment: 1,
|
|
67
|
+
fixed_size: 1),
|
|
68
|
+
}.freeze
|
|
69
|
+
|
|
70
|
+
# Variable-length types (varlena): 4-byte aligned header, variable content.
|
|
71
|
+
VARIABLE = %w[
|
|
72
|
+
text varchar character\ varying bytea json jsonb xml
|
|
73
|
+
numeric decimal cidr inet macaddr tsvector tsquery
|
|
74
|
+
character char
|
|
75
|
+
].freeze
|
|
76
|
+
|
|
77
|
+
# Prefixes that indicate a parameterized variable-length type,
|
|
78
|
+
# e.g. "varchar(255)", "numeric(10,2)", "char(2)".
|
|
79
|
+
VARIABLE_PREFIXES = %w[
|
|
80
|
+
varchar character\ varying numeric decimal char character
|
|
81
|
+
bit varying varbit
|
|
82
|
+
].freeze
|
|
83
|
+
|
|
84
|
+
# Returns a TypeInfo for the given type string, or a safe variable-length
|
|
85
|
+
# default if the type is unknown. Never raises.
|
|
86
|
+
def self.lookup(type_str)
|
|
87
|
+
normalized = type_str.to_s.strip.downcase
|
|
88
|
+
|
|
89
|
+
# Exact match first.
|
|
90
|
+
return FIXED[normalized] if FIXED.key?(normalized)
|
|
91
|
+
|
|
92
|
+
# Parameterized variable-length types: varchar(N), numeric(P,S), etc.
|
|
93
|
+
VARIABLE_PREFIXES.each do |prefix|
|
|
94
|
+
if normalized.start_with?(prefix)
|
|
95
|
+
return TypeInfo.new(canonical_name: type_str, alignment: 4, fixed_size: nil)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Plain variable-length names.
|
|
100
|
+
VARIABLE.each do |name|
|
|
101
|
+
return TypeInfo.new(canonical_name: type_str, alignment: 4, fixed_size: nil) if normalized == name
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Unknown type: assume variable-length (safest for padding analysis).
|
|
105
|
+
TypeInfo.new(canonical_name: type_str, alignment: 4, fixed_size: nil)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def self.known?(type_str)
|
|
109
|
+
normalized = type_str.to_s.strip.downcase
|
|
110
|
+
return true if FIXED.key?(normalized)
|
|
111
|
+
VARIABLE_PREFIXES.any? { |p| normalized.start_with?(p) } ||
|
|
112
|
+
VARIABLE.include?(normalized)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
data/lib/pcrd/sql.rb
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Pcrd
|
|
6
|
+
# Centralized SQL identifier rendering. Every place that builds SQL (DDL,
|
|
7
|
+
# setup, apply, verify, validation) goes through here so quoting and schema
|
|
8
|
+
# qualification are consistent instead of three different conventions —
|
|
9
|
+
# some of which interpolated identifiers raw and broke on mixed-case,
|
|
10
|
+
# reserved-word, or non-public names.
|
|
11
|
+
#
|
|
12
|
+
# Quoting follows PostgreSQL's own quote_ident(): an identifier that is a
|
|
13
|
+
# safe lowercase word and not a reserved keyword is emitted bare (so normal
|
|
14
|
+
# DDL stays readable), otherwise it is double-quoted with internal quotes
|
|
15
|
+
# doubled. The reserved set is the common subset most likely to appear as a
|
|
16
|
+
# column or table name; anything not lowercase-simple is always quoted, so
|
|
17
|
+
# the only risk from an incomplete set is an unnecessary quote, never a
|
|
18
|
+
# broken statement.
|
|
19
|
+
module Sql
|
|
20
|
+
SAFE_IDENT = /\A[a-z_][a-z0-9_$]*\z/
|
|
21
|
+
|
|
22
|
+
RESERVED = %w[
|
|
23
|
+
all analyse analyze and any array as asc asymmetric authorization
|
|
24
|
+
between binary both case cast check collate column constraint create
|
|
25
|
+
cross current_catalog current_date current_role current_time
|
|
26
|
+
current_timestamp current_user default deferrable desc distinct do else
|
|
27
|
+
end except false fetch for foreign from grant group having ilike in
|
|
28
|
+
initially inner intersect into is isnull join lateral leading left like
|
|
29
|
+
limit localtime localtimestamp natural not notnull null offset on only or
|
|
30
|
+
order outer overlaps placing primary references returning right select
|
|
31
|
+
session_user similar some symmetric table tablesample then to trailing
|
|
32
|
+
true union unique user using variadic verbose when where window with
|
|
33
|
+
].to_set.freeze
|
|
34
|
+
|
|
35
|
+
module_function
|
|
36
|
+
|
|
37
|
+
# Quotes an identifier only when PostgreSQL would require it.
|
|
38
|
+
def quote_ident(name)
|
|
39
|
+
str = name.to_s
|
|
40
|
+
return str if str.match?(SAFE_IDENT) && !RESERVED.include?(str)
|
|
41
|
+
|
|
42
|
+
%("#{str.gsub('"', '""')}")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Fully-qualified, quoted "schema.table".
|
|
46
|
+
def quote_table(name, schema: "public")
|
|
47
|
+
"#{quote_ident(schema)}.#{quote_ident(name)}"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Comma-joined list of quoted column identifiers.
|
|
51
|
+
def quote_columns(names)
|
|
52
|
+
names.map { |n| quote_ident(n) }.join(", ")
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Pcrd
|
|
6
|
+
module Transform
|
|
7
|
+
# Applies a Config::Table migration spec to a single row hash.
|
|
8
|
+
#
|
|
9
|
+
# Handles the structural changes: drops (exclude column), renames (change key),
|
|
10
|
+
# and pass-through for unchanged columns. Does NOT perform Ruby-level type
|
|
11
|
+
# conversion — PostgreSQL coerces values on INSERT/COPY, so string values from
|
|
12
|
+
# the source pass through as-is.
|
|
13
|
+
#
|
|
14
|
+
# Added columns (from add_columns) are NOT included in the transformer output.
|
|
15
|
+
# They are omitted from the INSERT column list so the target database applies
|
|
16
|
+
# their DEFAULT expressions directly.
|
|
17
|
+
#
|
|
18
|
+
# Usage:
|
|
19
|
+
# transformer = RowTransformer.new(table_config, source_columns)
|
|
20
|
+
# target_row = transformer.transform(source_row_hash)
|
|
21
|
+
# columns = transformer.target_column_names
|
|
22
|
+
class RowTransformer
|
|
23
|
+
def initialize(table_config, source_columns)
|
|
24
|
+
@source_names = source_columns.map(&:name)
|
|
25
|
+
@drops = build_drop_set(table_config)
|
|
26
|
+
@renames = build_rename_map(table_config)
|
|
27
|
+
@target_names = build_target_names
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Returns a hash of {target_column_name => value} for all non-dropped columns.
|
|
31
|
+
# Values are whatever the pg gem returned (typically String or nil).
|
|
32
|
+
def transform(row_hash)
|
|
33
|
+
@target_names.each_with_object({}).with_index do |(tgt_name, result), i|
|
|
34
|
+
result[tgt_name] = row_hash[@source_names_kept[i]]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Ordered list of target column names produced by #transform.
|
|
39
|
+
# Pass this to the backfill engine when constructing INSERT/COPY statements.
|
|
40
|
+
def target_column_names
|
|
41
|
+
@target_names
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Ordered list of source column names that survive (not dropped).
|
|
45
|
+
def source_column_names_kept
|
|
46
|
+
@source_names_kept
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def build_drop_set(config)
|
|
52
|
+
(config.columns || {}).each_with_object(Set.new) do |(name, spec), set|
|
|
53
|
+
set << name.to_s if spec.drop
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def build_rename_map(config)
|
|
58
|
+
(config.columns || {}).each_with_object({}) do |(name, spec), map|
|
|
59
|
+
map[name.to_s] = spec.rename if spec.rename
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def build_target_names
|
|
64
|
+
@source_names_kept = @source_names.reject { |n| @drops.include?(n) }
|
|
65
|
+
@source_names_kept.map { |n| @renames[n] || n }
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Pcrd
|
|
6
|
+
module Transform
|
|
7
|
+
# Registry of known PostgreSQL type transitions and their safety classification.
|
|
8
|
+
#
|
|
9
|
+
# Works with pg's internal type_name values (int4, int8, bool, etc.) for the
|
|
10
|
+
# source side, and normalizes user-facing spec strings (bigint, timestamptz)
|
|
11
|
+
# to the same internal names for matching.
|
|
12
|
+
#
|
|
13
|
+
# Safety levels:
|
|
14
|
+
# :no_op — source and target are the same type; nothing to do
|
|
15
|
+
# :always_safe — widening cast; no possible data loss; no validation needed
|
|
16
|
+
# :validated — cast may lose data; Validator must run a pre-migration check
|
|
17
|
+
# :unsupported — pcrd cannot handle this cast; user must provide a custom transform
|
|
18
|
+
module TypeMap
|
|
19
|
+
# Maps user-facing type strings in migration specs to pg internal type names.
|
|
20
|
+
SPEC_TO_PG = {
|
|
21
|
+
"bigint" => "int8",
|
|
22
|
+
"int8" => "int8",
|
|
23
|
+
"integer" => "int4",
|
|
24
|
+
"int4" => "int4",
|
|
25
|
+
"int" => "int4",
|
|
26
|
+
"smallint" => "int2",
|
|
27
|
+
"int2" => "int2",
|
|
28
|
+
"real" => "float4",
|
|
29
|
+
"float4" => "float4",
|
|
30
|
+
"double precision" => "float8",
|
|
31
|
+
"float8" => "float8",
|
|
32
|
+
"boolean" => "bool",
|
|
33
|
+
"bool" => "bool",
|
|
34
|
+
"text" => "text",
|
|
35
|
+
"date" => "date",
|
|
36
|
+
"timestamp" => "timestamp",
|
|
37
|
+
"timestamp without time zone" => "timestamp",
|
|
38
|
+
"timestamptz" => "timestamptz",
|
|
39
|
+
"timestamp with time zone" => "timestamptz",
|
|
40
|
+
"time" => "time",
|
|
41
|
+
"time without time zone" => "time",
|
|
42
|
+
"timetz" => "timetz",
|
|
43
|
+
"time with time zone" => "timetz",
|
|
44
|
+
"numeric" => "numeric",
|
|
45
|
+
"decimal" => "numeric",
|
|
46
|
+
"money" => "money",
|
|
47
|
+
"uuid" => "uuid",
|
|
48
|
+
"json" => "json",
|
|
49
|
+
"jsonb" => "jsonb",
|
|
50
|
+
"bytea" => "bytea",
|
|
51
|
+
"oid" => "oid",
|
|
52
|
+
}.freeze
|
|
53
|
+
|
|
54
|
+
# Always-safe casts: pure widening, no possible data loss.
|
|
55
|
+
# Keys are [pg_source_type, pg_target_type].
|
|
56
|
+
ALWAYS_SAFE_PAIRS = Set.new([
|
|
57
|
+
%w[int2 int4],
|
|
58
|
+
%w[int2 int8],
|
|
59
|
+
%w[int4 int8],
|
|
60
|
+
%w[int2 float4],
|
|
61
|
+
%w[int4 float4],
|
|
62
|
+
%w[int2 float8],
|
|
63
|
+
%w[int4 float8],
|
|
64
|
+
%w[int8 float8],
|
|
65
|
+
%w[float4 float8],
|
|
66
|
+
%w[int2 numeric],
|
|
67
|
+
%w[int4 numeric],
|
|
68
|
+
%w[int8 numeric],
|
|
69
|
+
%w[float4 numeric],
|
|
70
|
+
%w[float8 numeric],
|
|
71
|
+
%w[date timestamp],
|
|
72
|
+
%w[date timestamptz],
|
|
73
|
+
%w[timestamp timestamptz],
|
|
74
|
+
%w[bpchar text], # char(n) → text
|
|
75
|
+
%w[varchar text], # varchar(n) → text
|
|
76
|
+
%w[bpchar varchar], # char(n) → varchar(m) — validated below if m < n
|
|
77
|
+
%w[name text],
|
|
78
|
+
%w[json jsonb],
|
|
79
|
+
]).freeze
|
|
80
|
+
|
|
81
|
+
# Validated casts: may lose data; Validator generates SQL to check.
|
|
82
|
+
# Values include: :description, :check_expr (a Proc → SQL fragment), :warn_only.
|
|
83
|
+
VALIDATED_RULES = [
|
|
84
|
+
{
|
|
85
|
+
from: "int8", to: "int4",
|
|
86
|
+
description: "values must fit in integer range [-2,147,483,648 … 2,147,483,647]",
|
|
87
|
+
check_expr: ->(col) { "#{col} NOT BETWEEN -2147483648 AND 2147483647" },
|
|
88
|
+
warn_only: false
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
from: "int8", to: "int2",
|
|
92
|
+
description: "values must fit in smallint range [-32,768 … 32,767]",
|
|
93
|
+
check_expr: ->(col) { "#{col} NOT BETWEEN -32768 AND 32767" },
|
|
94
|
+
warn_only: false
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
from: "int4", to: "int2",
|
|
98
|
+
description: "values must fit in smallint range [-32,768 … 32,767]",
|
|
99
|
+
check_expr: ->(col) { "#{col} NOT BETWEEN -32768 AND 32767" },
|
|
100
|
+
warn_only: false
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
from: "float8", to: "float4",
|
|
104
|
+
description: "precision will be reduced (double precision → real); some values may differ",
|
|
105
|
+
check_expr: nil,
|
|
106
|
+
warn_only: true
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
from: "timestamptz", to: "timestamp",
|
|
110
|
+
description: "timezone information will be discarded",
|
|
111
|
+
check_expr: nil,
|
|
112
|
+
warn_only: true
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
from: "numeric", to: "int8",
|
|
116
|
+
description: "fractional parts will be truncated; values must be whole numbers",
|
|
117
|
+
check_expr: ->(col) { "#{col} <> floor(#{col})" },
|
|
118
|
+
warn_only: false
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
from: "numeric", to: "int4",
|
|
122
|
+
description: "fractional parts truncated; values must fit in integer range",
|
|
123
|
+
check_expr: ->(col) { "floor(#{col}) NOT BETWEEN -2147483648 AND 2147483647 OR #{col} <> floor(#{col})" },
|
|
124
|
+
warn_only: false
|
|
125
|
+
},
|
|
126
|
+
# text/varchar → varchar(n): length check — handled separately via varchar_length_check
|
|
127
|
+
{
|
|
128
|
+
from: "text", to: "varchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
|
|
129
|
+
{
|
|
130
|
+
from: "varchar", to: "varchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
|
|
131
|
+
{
|
|
132
|
+
from: "varchar", to: "bpchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
|
|
133
|
+
{
|
|
134
|
+
from: "text", to: "bpchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
|
|
135
|
+
].freeze
|
|
136
|
+
|
|
137
|
+
# Returns the safety classification for a source→target type transition.
|
|
138
|
+
#
|
|
139
|
+
# source_pg_type: pg internal type name from Schema::Column#type_name
|
|
140
|
+
# target_type_str: type string from the migration spec (e.g. "bigint", "varchar(255)")
|
|
141
|
+
def self.cast_safety(source_pg_type, target_type_str)
|
|
142
|
+
src = source_pg_type.to_s.strip
|
|
143
|
+
tgt_pg, tgt_base = normalize_target(target_type_str)
|
|
144
|
+
|
|
145
|
+
# Same base type: usually no-op, but with special cases for parameterized types.
|
|
146
|
+
if src == tgt_pg || (src == tgt_base && tgt_pg.nil?)
|
|
147
|
+
# varchar/char → varchar/char with a length constraint needs validation.
|
|
148
|
+
if %w[bpchar varchar].include?(src) && extract_length(target_type_str)
|
|
149
|
+
return :validated
|
|
150
|
+
end
|
|
151
|
+
# numeric → numeric with any parameterization is always safe (precision can
|
|
152
|
+
# only be widened without validation — pcrd doesn't restrict to widening only,
|
|
153
|
+
# but narrowing numeric is caught by the validated rule below).
|
|
154
|
+
return :no_op
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
return :always_safe if ALWAYS_SAFE_PAIRS.include?([src, tgt_base])
|
|
158
|
+
|
|
159
|
+
# varchar → varchar(m): validated (length comparison handled by Validator)
|
|
160
|
+
if %w[bpchar varchar].include?(src) && %w[varchar bpchar].include?(tgt_base)
|
|
161
|
+
tgt_len = extract_length(target_type_str)
|
|
162
|
+
return :always_safe if tgt_len.nil? # → text (already covered above)
|
|
163
|
+
return :validated
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
validated = VALIDATED_RULES.find { |r| r[:from] == src && r[:to] == tgt_base }
|
|
167
|
+
return :validated if validated
|
|
168
|
+
|
|
169
|
+
:unsupported
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Returns the validated rule for a source→target pair, or nil.
|
|
173
|
+
def self.validated_rule(source_pg_type, target_type_str)
|
|
174
|
+
_, tgt_base = normalize_target(target_type_str)
|
|
175
|
+
VALIDATED_RULES.find { |r| r[:from] == source_pg_type && r[:to] == tgt_base }
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Returns true if a target type string refers to a known type.
|
|
179
|
+
def self.known_target?(type_str)
|
|
180
|
+
_, base = normalize_target(type_str)
|
|
181
|
+
SPEC_TO_PG.value?(base) || %w[varchar bpchar].include?(base)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Extracts the length parameter from a varchar(N) / char(N) type string.
|
|
185
|
+
# Returns nil if not parameterized.
|
|
186
|
+
def self.extract_length(type_str)
|
|
187
|
+
return nil unless type_str
|
|
188
|
+
m = type_str.match(/\((\d+)/)
|
|
189
|
+
m ? m[1].to_i : nil
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
private_class_method def self.normalize_target(type_str) # rubocop:disable Metrics/MethodLength
|
|
193
|
+
s = type_str.to_s.strip.downcase
|
|
194
|
+
base = s.split("(").first.strip
|
|
195
|
+
|
|
196
|
+
# Parameterized varchar/char: keep base separate
|
|
197
|
+
if s.start_with?("character varying", "varchar")
|
|
198
|
+
return [nil, "varchar"]
|
|
199
|
+
end
|
|
200
|
+
if s.start_with?("character(", "char(", "bpchar")
|
|
201
|
+
return [nil, "bpchar"]
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
pg = SPEC_TO_PG[s] || SPEC_TO_PG[base]
|
|
205
|
+
[pg, pg || base]
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|