pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +24 -0
  3. data/LICENSE +21 -0
  4. data/README.md +614 -0
  5. data/bin/pcrd +7 -0
  6. data/lib/pcrd/advisory_lock.rb +50 -0
  7. data/lib/pcrd/apply/engine.rb +184 -0
  8. data/lib/pcrd/apply/worker.rb +97 -0
  9. data/lib/pcrd/backfill/batch.rb +158 -0
  10. data/lib/pcrd/backfill/engine.rb +153 -0
  11. data/lib/pcrd/checkpoint/store.rb +217 -0
  12. data/lib/pcrd/cli.rb +274 -0
  13. data/lib/pcrd/commands/analyze.rb +125 -0
  14. data/lib/pcrd/commands/cleanup.rb +112 -0
  15. data/lib/pcrd/commands/demo.rb +152 -0
  16. data/lib/pcrd/commands/readiness.rb +30 -0
  17. data/lib/pcrd/commands/status.rb +129 -0
  18. data/lib/pcrd/commands/verify.rb +172 -0
  19. data/lib/pcrd/config/add_column.rb +7 -0
  20. data/lib/pcrd/config/analyze_config.rb +8 -0
  21. data/lib/pcrd/config/column_spec.rb +10 -0
  22. data/lib/pcrd/config/connection.rb +7 -0
  23. data/lib/pcrd/config/cutover_config.rb +7 -0
  24. data/lib/pcrd/config/load_error.rb +7 -0
  25. data/lib/pcrd/config/loader.rb +158 -0
  26. data/lib/pcrd/config/migrate_config.rb +21 -0
  27. data/lib/pcrd/config/root.rb +9 -0
  28. data/lib/pcrd/config/schema.rb +62 -0
  29. data/lib/pcrd/config/table.rb +9 -0
  30. data/lib/pcrd/config/verify_config.rb +7 -0
  31. data/lib/pcrd/config.rb +7 -0
  32. data/lib/pcrd/connection/client.rb +129 -0
  33. data/lib/pcrd/connection/error.rb +7 -0
  34. data/lib/pcrd/connection/replication.rb +108 -0
  35. data/lib/pcrd/cutover/orchestrator.rb +108 -0
  36. data/lib/pcrd/cutover/sequences.rb +138 -0
  37. data/lib/pcrd/demo/generator.rb +214 -0
  38. data/lib/pcrd/demo/schema.rb +154 -0
  39. data/lib/pcrd/error.rb +12 -0
  40. data/lib/pcrd/migration/orchestrator.rb +272 -0
  41. data/lib/pcrd/monitor/lag.rb +107 -0
  42. data/lib/pcrd/options.rb +15 -0
  43. data/lib/pcrd/output/analyze_printer.rb +173 -0
  44. data/lib/pcrd/output/cutover_printer.rb +128 -0
  45. data/lib/pcrd/output/preflight_printer.rb +119 -0
  46. data/lib/pcrd/output/readiness_printer.rb +72 -0
  47. data/lib/pcrd/preflight.rb +331 -0
  48. data/lib/pcrd/readiness/manifest.rb +201 -0
  49. data/lib/pcrd/replication/consumer.rb +235 -0
  50. data/lib/pcrd/replication/error.rb +10 -0
  51. data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
  52. data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
  53. data/lib/pcrd/reporter/console.rb +46 -0
  54. data/lib/pcrd/reporter/null.rb +14 -0
  55. data/lib/pcrd/schema/column.rb +59 -0
  56. data/lib/pcrd/schema/ddl.rb +71 -0
  57. data/lib/pcrd/schema/diff_entry.rb +36 -0
  58. data/lib/pcrd/schema/differ.rb +175 -0
  59. data/lib/pcrd/schema/object_reader.rb +187 -0
  60. data/lib/pcrd/schema/packer.rb +90 -0
  61. data/lib/pcrd/schema/reader.rb +118 -0
  62. data/lib/pcrd/schema/setup.rb +143 -0
  63. data/lib/pcrd/schema/setup_error.rb +9 -0
  64. data/lib/pcrd/schema/table_not_found.rb +8 -0
  65. data/lib/pcrd/schema/type_registry.rb +116 -0
  66. data/lib/pcrd/sql.rb +55 -0
  67. data/lib/pcrd/transform/row_transformer.rb +69 -0
  68. data/lib/pcrd/transform/type_map.rb +209 -0
  69. data/lib/pcrd/transform/validator.rb +106 -0
  70. data/lib/pcrd/version.rb +5 -0
  71. data/lib/pcrd.rb +11 -0
  72. metadata +231 -0
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Creates target tables from the migration spec.
6
+ # Called at the start of `pcrd migrate` (after preflight passes).
7
+ #
8
+ # In the full streaming flow (Phase 9+), Setup also creates the publication
9
+ # and replication slot on source. For --backfill-only those are skipped.
10
+ class Setup
11
+ def initialize(source_pool:, target_pool:, config:)
12
+ @source_pool = source_pool
13
+ @target_pool = target_pool
14
+ @config = config
15
+ end
16
+
17
+ # Creates the publication and replication slot on the source.
18
+ # Returns the slot's starting LSN as a "X/Y" string — pass this to the
19
+ # consumer so streaming begins from a point that covers all of backfill.
20
+ # Idempotently ensures the publication and replication slot exist for a
21
+ # fresh migration, returning the slot's starting LSN ("X/Y").
22
+ #
23
+ # A leftover publication from a partial prior run is reused if it covers
24
+ # exactly the configured tables (it is just a definition). A leftover
25
+ # slot is NOT reused: its WAL position is unknown relative to backfill, so
26
+ # we refuse and point the operator at --resume or `pcrd cleanup`.
27
+ def create_publication_and_slot(pub_name:, slot_name:)
28
+ ensure_publication(pub_name)
29
+
30
+ if slot_exists?(slot_name)
31
+ raise SetupError, "Replication slot '#{slot_name}' already exists. Resume the existing " \
32
+ "migration with --resume, or remove it with `pcrd cleanup` to start over."
33
+ end
34
+
35
+ result = @source_pool.exec(
36
+ "SELECT lsn FROM pg_create_logical_replication_slot($1, 'pgoutput')",
37
+ [slot_name]
38
+ )
39
+ result[0]["lsn"]
40
+ end
41
+
42
+ # Validates that a --resume run has the slot and publication it needs.
43
+ # Raises with a clear message if either is missing.
44
+ def validate_resumable!(pub_name:, slot_name:)
45
+ unless slot_exists?(slot_name)
46
+ raise SetupError, "Cannot resume: replication slot '#{slot_name}' does not exist on the source. " \
47
+ "Start a fresh migration (without --resume)."
48
+ end
49
+
50
+ unless publication_exists?(pub_name)
51
+ raise SetupError, "Cannot resume: publication '#{pub_name}' does not exist on the source. " \
52
+ "Start a fresh migration (without --resume)."
53
+ end
54
+ end
55
+
56
+ # Drops the publication and replication slot (cleanup phase).
57
+ def drop_publication_and_slot(pub_name:, slot_name:)
58
+ @source_pool.exec_sql(
59
+ "DROP PUBLICATION IF EXISTS #{@source_pool.quote_ident(pub_name)}"
60
+ )
61
+ @source_pool.exec(
62
+ "SELECT pg_drop_replication_slot($1) WHERE EXISTS (" \
63
+ " SELECT 1 FROM pg_replication_slots WHERE slot_name = $1)",
64
+ [slot_name]
65
+ )
66
+ end
67
+
68
+ # Creates all target tables and returns a Hash<table_name, ddl_string>.
69
+ # Raises if a target table already exists (use --force-overwrite to drop first).
70
+ def create_target_tables(force_overwrite: false)
71
+ reader = Reader.new(@source_pool)
72
+ ddls = {}
73
+
74
+ @config.migrate.tables.each do |table_config|
75
+ name = table_config.name
76
+ source_cols = reader.read(name)
77
+ pk_cols = reader.primary_key_columns(name)
78
+
79
+ ddl = DDL.generate(
80
+ source_columns: source_cols,
81
+ table_config: table_config,
82
+ primary_key_columns: pk_cols
83
+ )
84
+
85
+ target_reader = Reader.new(@target_pool)
86
+ if target_reader.table_exists?(name)
87
+ if force_overwrite
88
+ @target_pool.exec_sql("DROP TABLE IF EXISTS #{Sql.quote_table(name)} CASCADE")
89
+ else
90
+ raise SetupError, "Table '#{name}' already exists on target. " \
91
+ "Pass --force-overwrite to drop and recreate."
92
+ end
93
+ end
94
+
95
+ @target_pool.exec_sql("#{ddl};")
96
+ ddls[name] = ddl
97
+ end
98
+
99
+ ddls
100
+ end
101
+
102
+ private
103
+
104
+ # Creates the publication if absent; reuses it if it already covers exactly
105
+ # the configured tables; raises if it exists but covers a different set.
106
+ def ensure_publication(pub_name)
107
+ configured = @config.migrate.tables.map(&:name).sort
108
+
109
+ if publication_exists?(pub_name)
110
+ existing = publication_tables(pub_name).sort
111
+ return if existing == configured
112
+
113
+ raise SetupError, "Publication '#{pub_name}' already exists but covers #{existing.inspect}, " \
114
+ "not the configured tables #{configured.inspect}. " \
115
+ "Drop it with `pcrd cleanup` or reconcile the config."
116
+ end
117
+
118
+ table_list = @config.migrate.tables.map { |t| Sql.quote_table(t.name) }.join(", ")
119
+ @source_pool.exec_sql(
120
+ "CREATE PUBLICATION #{@source_pool.quote_ident(pub_name)} FOR TABLE #{table_list}"
121
+ )
122
+ end
123
+
124
+ def publication_exists?(pub_name)
125
+ @source_pool.exec(
126
+ "SELECT 1 FROM pg_publication WHERE pubname = $1", [pub_name]
127
+ ).ntuples.positive?
128
+ end
129
+
130
+ def publication_tables(pub_name)
131
+ @source_pool.exec(
132
+ "SELECT tablename FROM pg_publication_tables WHERE pubname = $1", [pub_name]
133
+ ).column_values(0)
134
+ end
135
+
136
+ def slot_exists?(slot_name)
137
+ @source_pool.exec(
138
+ "SELECT 1 FROM pg_replication_slots WHERE slot_name = $1", [slot_name]
139
+ ).ntuples.positive?
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Raised when replication/target setup cannot proceed safely — an existing
6
+ # slot, a mismatched publication, or a target table that already exists.
7
+ class SetupError < Pcrd::Error; end
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Raised when a configured table is not present on the source.
6
+ class TableNotFound < Pcrd::Error; end
7
+ end
8
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Maps PostgreSQL type name strings (as they appear in migration specs)
6
+ # to the physical storage properties needed for padding analysis and
7
+ # synthetic column construction.
8
+ #
9
+ # Used when building target Schema::Column objects from a migration spec
10
+ # without a real target DB connection.
11
+ module TypeRegistry
12
+ TypeInfo = Data.define(:canonical_name, :alignment, :fixed_size)
13
+
14
+ # Fixed-size types with exact mappings.
15
+ FIXED = {
16
+ # 8-byte, 8-byte aligned
17
+ "bigint" => TypeInfo.new(canonical_name: "bigint", alignment: 8,
18
+ fixed_size: 8),
19
+ "int8" => TypeInfo.new(canonical_name: "bigint", alignment: 8,
20
+ fixed_size: 8),
21
+ "double precision" => TypeInfo.new(canonical_name: "double precision", alignment: 8,
22
+ fixed_size: 8),
23
+ "float8" => TypeInfo.new(canonical_name: "double precision", alignment: 8,
24
+ fixed_size: 8),
25
+ "timestamp" => TypeInfo.new(canonical_name: "timestamp", alignment: 8,
26
+ fixed_size: 8),
27
+ "timestamp without time zone" => TypeInfo.new(canonical_name: "timestamp", alignment: 8,
28
+ fixed_size: 8),
29
+ "timestamptz" => TypeInfo.new(canonical_name: "timestamptz", alignment: 8,
30
+ fixed_size: 8),
31
+ "timestamp with time zone" => TypeInfo.new(canonical_name: "timestamptz", alignment: 8,
32
+ fixed_size: 8),
33
+ "interval" => TypeInfo.new(canonical_name: "interval", alignment: 8,
34
+ fixed_size: 16),
35
+ "money" => TypeInfo.new(canonical_name: "money", alignment: 8,
36
+ fixed_size: 8),
37
+ # 4-byte, 4-byte aligned
38
+ "integer" => TypeInfo.new(canonical_name: "integer", alignment: 4,
39
+ fixed_size: 4),
40
+ "int4" => TypeInfo.new(canonical_name: "integer", alignment: 4,
41
+ fixed_size: 4),
42
+ "int" => TypeInfo.new(canonical_name: "integer", alignment: 4,
43
+ fixed_size: 4),
44
+ "real" => TypeInfo.new(canonical_name: "real", alignment: 4,
45
+ fixed_size: 4),
46
+ "float4" => TypeInfo.new(canonical_name: "real", alignment: 4,
47
+ fixed_size: 4),
48
+ "date" => TypeInfo.new(canonical_name: "date", alignment: 4,
49
+ fixed_size: 4),
50
+ "time" => TypeInfo.new(canonical_name: "time", alignment: 4,
51
+ fixed_size: 8),
52
+ "time without time zone" => TypeInfo.new(canonical_name: "time", alignment: 4,
53
+ fixed_size: 8),
54
+ "oid" => TypeInfo.new(canonical_name: "oid", alignment: 4,
55
+ fixed_size: 4),
56
+ # 2-byte, 2-byte aligned
57
+ "smallint" => TypeInfo.new(canonical_name: "smallint", alignment: 2,
58
+ fixed_size: 2),
59
+ "int2" => TypeInfo.new(canonical_name: "smallint", alignment: 2,
60
+ fixed_size: 2),
61
+ # 1-byte, 1-byte aligned
62
+ "boolean" => TypeInfo.new(canonical_name: "boolean", alignment: 1,
63
+ fixed_size: 1),
64
+ "bool" => TypeInfo.new(canonical_name: "boolean", alignment: 1,
65
+ fixed_size: 1),
66
+ "\"char\"" => TypeInfo.new(canonical_name: "\"char\"", alignment: 1,
67
+ fixed_size: 1),
68
+ }.freeze
69
+
70
+ # Variable-length types (varlena): 4-byte aligned header, variable content.
71
+ VARIABLE = %w[
72
+ text varchar character\ varying bytea json jsonb xml
73
+ numeric decimal cidr inet macaddr tsvector tsquery
74
+ character char
75
+ ].freeze
76
+
77
+ # Prefixes that indicate a parameterized variable-length type,
78
+ # e.g. "varchar(255)", "numeric(10,2)", "char(2)".
79
+ VARIABLE_PREFIXES = %w[
80
+ varchar character\ varying numeric decimal char character
81
+ bit varying varbit
82
+ ].freeze
83
+
84
+ # Returns a TypeInfo for the given type string, or a safe variable-length
85
+ # default if the type is unknown. Never raises.
86
+ def self.lookup(type_str)
87
+ normalized = type_str.to_s.strip.downcase
88
+
89
+ # Exact match first.
90
+ return FIXED[normalized] if FIXED.key?(normalized)
91
+
92
+ # Parameterized variable-length types: varchar(N), numeric(P,S), etc.
93
+ VARIABLE_PREFIXES.each do |prefix|
94
+ if normalized.start_with?(prefix)
95
+ return TypeInfo.new(canonical_name: type_str, alignment: 4, fixed_size: nil)
96
+ end
97
+ end
98
+
99
+ # Plain variable-length names.
100
+ VARIABLE.each do |name|
101
+ return TypeInfo.new(canonical_name: type_str, alignment: 4, fixed_size: nil) if normalized == name
102
+ end
103
+
104
+ # Unknown type: assume variable-length (safest for padding analysis).
105
+ TypeInfo.new(canonical_name: type_str, alignment: 4, fixed_size: nil)
106
+ end
107
+
108
+ def self.known?(type_str)
109
+ normalized = type_str.to_s.strip.downcase
110
+ return true if FIXED.key?(normalized)
111
+ VARIABLE_PREFIXES.any? { |p| normalized.start_with?(p) } ||
112
+ VARIABLE.include?(normalized)
113
+ end
114
+ end
115
+ end
116
+ end
data/lib/pcrd/sql.rb ADDED
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Pcrd
6
+ # Centralized SQL identifier rendering. Every place that builds SQL (DDL,
7
+ # setup, apply, verify, validation) goes through here so quoting and schema
8
+ # qualification are consistent instead of three different conventions —
9
+ # some of which interpolated identifiers raw and broke on mixed-case,
10
+ # reserved-word, or non-public names.
11
+ #
12
+ # Quoting follows PostgreSQL's own quote_ident(): an identifier that is a
13
+ # safe lowercase word and not a reserved keyword is emitted bare (so normal
14
+ # DDL stays readable), otherwise it is double-quoted with internal quotes
15
+ # doubled. The reserved set is the common subset most likely to appear as a
16
+ # column or table name; anything not lowercase-simple is always quoted, so
17
+ # the only risk from an incomplete set is an unnecessary quote, never a
18
+ # broken statement.
19
+ module Sql
20
+ SAFE_IDENT = /\A[a-z_][a-z0-9_$]*\z/
21
+
22
+ RESERVED = %w[
23
+ all analyse analyze and any array as asc asymmetric authorization
24
+ between binary both case cast check collate column constraint create
25
+ cross current_catalog current_date current_role current_time
26
+ current_timestamp current_user default deferrable desc distinct do else
27
+ end except false fetch for foreign from grant group having ilike in
28
+ initially inner intersect into is isnull join lateral leading left like
29
+ limit localtime localtimestamp natural not notnull null offset on only or
30
+ order outer overlaps placing primary references returning right select
31
+ session_user similar some symmetric table tablesample then to trailing
32
+ true union unique user using variadic verbose when where window with
33
+ ].to_set.freeze
34
+
35
+ module_function
36
+
37
+ # Quotes an identifier only when PostgreSQL would require it.
38
+ def quote_ident(name)
39
+ str = name.to_s
40
+ return str if str.match?(SAFE_IDENT) && !RESERVED.include?(str)
41
+
42
+ %("#{str.gsub('"', '""')}")
43
+ end
44
+
45
+ # Fully-qualified, quoted "schema.table".
46
+ def quote_table(name, schema: "public")
47
+ "#{quote_ident(schema)}.#{quote_ident(name)}"
48
+ end
49
+
50
+ # Comma-joined list of quoted column identifiers.
51
+ def quote_columns(names)
52
+ names.map { |n| quote_ident(n) }.join(", ")
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Pcrd
6
+ module Transform
7
+ # Applies a Config::Table migration spec to a single row hash.
8
+ #
9
+ # Handles the structural changes: drops (exclude column), renames (change key),
10
+ # and pass-through for unchanged columns. Does NOT perform Ruby-level type
11
+ # conversion — PostgreSQL coerces values on INSERT/COPY, so string values from
12
+ # the source pass through as-is.
13
+ #
14
+ # Added columns (from add_columns) are NOT included in the transformer output.
15
+ # They are omitted from the INSERT column list so the target database applies
16
+ # their DEFAULT expressions directly.
17
+ #
18
+ # Usage:
19
+ # transformer = RowTransformer.new(table_config, source_columns)
20
+ # target_row = transformer.transform(source_row_hash)
21
+ # columns = transformer.target_column_names
22
+ class RowTransformer
23
+ def initialize(table_config, source_columns)
24
+ @source_names = source_columns.map(&:name)
25
+ @drops = build_drop_set(table_config)
26
+ @renames = build_rename_map(table_config)
27
+ @target_names = build_target_names
28
+ end
29
+
30
+ # Returns a hash of {target_column_name => value} for all non-dropped columns.
31
+ # Values are whatever the pg gem returned (typically String or nil).
32
+ def transform(row_hash)
33
+ @target_names.each_with_object({}).with_index do |(tgt_name, result), i|
34
+ result[tgt_name] = row_hash[@source_names_kept[i]]
35
+ end
36
+ end
37
+
38
+ # Ordered list of target column names produced by #transform.
39
+ # Pass this to the backfill engine when constructing INSERT/COPY statements.
40
+ def target_column_names
41
+ @target_names
42
+ end
43
+
44
+ # Ordered list of source column names that survive (not dropped).
45
+ def source_column_names_kept
46
+ @source_names_kept
47
+ end
48
+
49
+ private
50
+
51
+ def build_drop_set(config)
52
+ (config.columns || {}).each_with_object(Set.new) do |(name, spec), set|
53
+ set << name.to_s if spec.drop
54
+ end
55
+ end
56
+
57
+ def build_rename_map(config)
58
+ (config.columns || {}).each_with_object({}) do |(name, spec), map|
59
+ map[name.to_s] = spec.rename if spec.rename
60
+ end
61
+ end
62
+
63
+ def build_target_names
64
+ @source_names_kept = @source_names.reject { |n| @drops.include?(n) }
65
+ @source_names_kept.map { |n| @renames[n] || n }
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Pcrd
6
+ module Transform
7
+ # Registry of known PostgreSQL type transitions and their safety classification.
8
+ #
9
+ # Works with pg's internal type_name values (int4, int8, bool, etc.) for the
10
+ # source side, and normalizes user-facing spec strings (bigint, timestamptz)
11
+ # to the same internal names for matching.
12
+ #
13
+ # Safety levels:
14
+ # :no_op — source and target are the same type; nothing to do
15
+ # :always_safe — widening cast; no possible data loss; no validation needed
16
+ # :validated — cast may lose data; Validator must run a pre-migration check
17
+ # :unsupported — pcrd cannot handle this cast; user must provide a custom transform
18
+ module TypeMap
19
+ # Maps user-facing type strings in migration specs to pg internal type names.
20
+ SPEC_TO_PG = {
21
+ "bigint" => "int8",
22
+ "int8" => "int8",
23
+ "integer" => "int4",
24
+ "int4" => "int4",
25
+ "int" => "int4",
26
+ "smallint" => "int2",
27
+ "int2" => "int2",
28
+ "real" => "float4",
29
+ "float4" => "float4",
30
+ "double precision" => "float8",
31
+ "float8" => "float8",
32
+ "boolean" => "bool",
33
+ "bool" => "bool",
34
+ "text" => "text",
35
+ "date" => "date",
36
+ "timestamp" => "timestamp",
37
+ "timestamp without time zone" => "timestamp",
38
+ "timestamptz" => "timestamptz",
39
+ "timestamp with time zone" => "timestamptz",
40
+ "time" => "time",
41
+ "time without time zone" => "time",
42
+ "timetz" => "timetz",
43
+ "time with time zone" => "timetz",
44
+ "numeric" => "numeric",
45
+ "decimal" => "numeric",
46
+ "money" => "money",
47
+ "uuid" => "uuid",
48
+ "json" => "json",
49
+ "jsonb" => "jsonb",
50
+ "bytea" => "bytea",
51
+ "oid" => "oid",
52
+ }.freeze
53
+
54
+ # Always-safe casts: pure widening, no possible data loss.
55
+ # Keys are [pg_source_type, pg_target_type].
56
+ ALWAYS_SAFE_PAIRS = Set.new([
57
+ %w[int2 int4],
58
+ %w[int2 int8],
59
+ %w[int4 int8],
60
+ %w[int2 float4],
61
+ %w[int4 float4],
62
+ %w[int2 float8],
63
+ %w[int4 float8],
64
+ %w[int8 float8],
65
+ %w[float4 float8],
66
+ %w[int2 numeric],
67
+ %w[int4 numeric],
68
+ %w[int8 numeric],
69
+ %w[float4 numeric],
70
+ %w[float8 numeric],
71
+ %w[date timestamp],
72
+ %w[date timestamptz],
73
+ %w[timestamp timestamptz],
74
+ %w[bpchar text], # char(n) → text
75
+ %w[varchar text], # varchar(n) → text
76
+ %w[bpchar varchar], # char(n) → varchar(m) — validated below if m < n
77
+ %w[name text],
78
+ %w[json jsonb],
79
+ ]).freeze
80
+
81
+ # Validated casts: may lose data; Validator generates SQL to check.
82
+ # Values include: :description, :check_expr (a Proc → SQL fragment), :warn_only.
83
+ VALIDATED_RULES = [
84
+ {
85
+ from: "int8", to: "int4",
86
+ description: "values must fit in integer range [-2,147,483,648 … 2,147,483,647]",
87
+ check_expr: ->(col) { "#{col} NOT BETWEEN -2147483648 AND 2147483647" },
88
+ warn_only: false
89
+ },
90
+ {
91
+ from: "int8", to: "int2",
92
+ description: "values must fit in smallint range [-32,768 … 32,767]",
93
+ check_expr: ->(col) { "#{col} NOT BETWEEN -32768 AND 32767" },
94
+ warn_only: false
95
+ },
96
+ {
97
+ from: "int4", to: "int2",
98
+ description: "values must fit in smallint range [-32,768 … 32,767]",
99
+ check_expr: ->(col) { "#{col} NOT BETWEEN -32768 AND 32767" },
100
+ warn_only: false
101
+ },
102
+ {
103
+ from: "float8", to: "float4",
104
+ description: "precision will be reduced (double precision → real); some values may differ",
105
+ check_expr: nil,
106
+ warn_only: true
107
+ },
108
+ {
109
+ from: "timestamptz", to: "timestamp",
110
+ description: "timezone information will be discarded",
111
+ check_expr: nil,
112
+ warn_only: true
113
+ },
114
+ {
115
+ from: "numeric", to: "int8",
116
+ description: "fractional parts will be truncated; values must be whole numbers",
117
+ check_expr: ->(col) { "#{col} <> floor(#{col})" },
118
+ warn_only: false
119
+ },
120
+ {
121
+ from: "numeric", to: "int4",
122
+ description: "fractional parts truncated; values must fit in integer range",
123
+ check_expr: ->(col) { "floor(#{col}) NOT BETWEEN -2147483648 AND 2147483647 OR #{col} <> floor(#{col})" },
124
+ warn_only: false
125
+ },
126
+ # text/varchar → varchar(n): length check — handled separately via varchar_length_check
127
+ {
128
+ from: "text", to: "varchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
129
+ {
130
+ from: "varchar", to: "varchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
131
+ {
132
+ from: "varchar", to: "bpchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
133
+ {
134
+ from: "text", to: "bpchar", description: "all values must fit within target length", check_expr: :varchar_length_check, warn_only: false },
135
+ ].freeze
136
+
137
+ # Returns the safety classification for a source→target type transition.
138
+ #
139
+ # source_pg_type: pg internal type name from Schema::Column#type_name
140
+ # target_type_str: type string from the migration spec (e.g. "bigint", "varchar(255)")
141
+ def self.cast_safety(source_pg_type, target_type_str)
142
+ src = source_pg_type.to_s.strip
143
+ tgt_pg, tgt_base = normalize_target(target_type_str)
144
+
145
+ # Same base type: usually no-op, but with special cases for parameterized types.
146
+ if src == tgt_pg || (src == tgt_base && tgt_pg.nil?)
147
+ # varchar/char → varchar/char with a length constraint needs validation.
148
+ if %w[bpchar varchar].include?(src) && extract_length(target_type_str)
149
+ return :validated
150
+ end
151
+ # numeric → numeric with any parameterization is always safe (precision can
152
+ # only be widened without validation — pcrd doesn't restrict to widening only,
153
+ # but narrowing numeric is caught by the validated rule below).
154
+ return :no_op
155
+ end
156
+
157
+ return :always_safe if ALWAYS_SAFE_PAIRS.include?([src, tgt_base])
158
+
159
+ # varchar → varchar(m): validated (length comparison handled by Validator)
160
+ if %w[bpchar varchar].include?(src) && %w[varchar bpchar].include?(tgt_base)
161
+ tgt_len = extract_length(target_type_str)
162
+ return :always_safe if tgt_len.nil? # → text (already covered above)
163
+ return :validated
164
+ end
165
+
166
+ validated = VALIDATED_RULES.find { |r| r[:from] == src && r[:to] == tgt_base }
167
+ return :validated if validated
168
+
169
+ :unsupported
170
+ end
171
+
172
+ # Returns the validated rule for a source→target pair, or nil.
173
+ def self.validated_rule(source_pg_type, target_type_str)
174
+ _, tgt_base = normalize_target(target_type_str)
175
+ VALIDATED_RULES.find { |r| r[:from] == source_pg_type && r[:to] == tgt_base }
176
+ end
177
+
178
+ # Returns true if a target type string refers to a known type.
179
+ def self.known_target?(type_str)
180
+ _, base = normalize_target(type_str)
181
+ SPEC_TO_PG.value?(base) || %w[varchar bpchar].include?(base)
182
+ end
183
+
184
+ # Extracts the length parameter from a varchar(N) / char(N) type string.
185
+ # Returns nil if not parameterized.
186
+ def self.extract_length(type_str)
187
+ return nil unless type_str
188
+ m = type_str.match(/\((\d+)/)
189
+ m ? m[1].to_i : nil
190
+ end
191
+
192
+ private_class_method def self.normalize_target(type_str) # rubocop:disable Metrics/MethodLength
193
+ s = type_str.to_s.strip.downcase
194
+ base = s.split("(").first.strip
195
+
196
+ # Parameterized varchar/char: keep base separate
197
+ if s.start_with?("character varying", "varchar")
198
+ return [nil, "varchar"]
199
+ end
200
+ if s.start_with?("character(", "char(", "bpchar")
201
+ return [nil, "bpchar"]
202
+ end
203
+
204
+ pg = SPEC_TO_PG[s] || SPEC_TO_PG[base]
205
+ [pg, pg || base]
206
+ end
207
+ end
208
+ end
209
+ end