pcrd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +24 -0
  3. data/LICENSE +21 -0
  4. data/README.md +614 -0
  5. data/bin/pcrd +7 -0
  6. data/lib/pcrd/advisory_lock.rb +50 -0
  7. data/lib/pcrd/apply/engine.rb +184 -0
  8. data/lib/pcrd/apply/worker.rb +97 -0
  9. data/lib/pcrd/backfill/batch.rb +158 -0
  10. data/lib/pcrd/backfill/engine.rb +153 -0
  11. data/lib/pcrd/checkpoint/store.rb +217 -0
  12. data/lib/pcrd/cli.rb +274 -0
  13. data/lib/pcrd/commands/analyze.rb +125 -0
  14. data/lib/pcrd/commands/cleanup.rb +112 -0
  15. data/lib/pcrd/commands/demo.rb +152 -0
  16. data/lib/pcrd/commands/readiness.rb +30 -0
  17. data/lib/pcrd/commands/status.rb +129 -0
  18. data/lib/pcrd/commands/verify.rb +172 -0
  19. data/lib/pcrd/config/add_column.rb +7 -0
  20. data/lib/pcrd/config/analyze_config.rb +8 -0
  21. data/lib/pcrd/config/column_spec.rb +10 -0
  22. data/lib/pcrd/config/connection.rb +7 -0
  23. data/lib/pcrd/config/cutover_config.rb +7 -0
  24. data/lib/pcrd/config/load_error.rb +7 -0
  25. data/lib/pcrd/config/loader.rb +158 -0
  26. data/lib/pcrd/config/migrate_config.rb +21 -0
  27. data/lib/pcrd/config/root.rb +9 -0
  28. data/lib/pcrd/config/schema.rb +62 -0
  29. data/lib/pcrd/config/table.rb +9 -0
  30. data/lib/pcrd/config/verify_config.rb +7 -0
  31. data/lib/pcrd/config.rb +7 -0
  32. data/lib/pcrd/connection/client.rb +129 -0
  33. data/lib/pcrd/connection/error.rb +7 -0
  34. data/lib/pcrd/connection/replication.rb +108 -0
  35. data/lib/pcrd/cutover/orchestrator.rb +108 -0
  36. data/lib/pcrd/cutover/sequences.rb +138 -0
  37. data/lib/pcrd/demo/generator.rb +214 -0
  38. data/lib/pcrd/demo/schema.rb +154 -0
  39. data/lib/pcrd/error.rb +12 -0
  40. data/lib/pcrd/migration/orchestrator.rb +272 -0
  41. data/lib/pcrd/monitor/lag.rb +107 -0
  42. data/lib/pcrd/options.rb +15 -0
  43. data/lib/pcrd/output/analyze_printer.rb +173 -0
  44. data/lib/pcrd/output/cutover_printer.rb +128 -0
  45. data/lib/pcrd/output/preflight_printer.rb +119 -0
  46. data/lib/pcrd/output/readiness_printer.rb +72 -0
  47. data/lib/pcrd/preflight.rb +331 -0
  48. data/lib/pcrd/readiness/manifest.rb +201 -0
  49. data/lib/pcrd/replication/consumer.rb +235 -0
  50. data/lib/pcrd/replication/error.rb +10 -0
  51. data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
  52. data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
  53. data/lib/pcrd/reporter/console.rb +46 -0
  54. data/lib/pcrd/reporter/null.rb +14 -0
  55. data/lib/pcrd/schema/column.rb +59 -0
  56. data/lib/pcrd/schema/ddl.rb +71 -0
  57. data/lib/pcrd/schema/diff_entry.rb +36 -0
  58. data/lib/pcrd/schema/differ.rb +175 -0
  59. data/lib/pcrd/schema/object_reader.rb +187 -0
  60. data/lib/pcrd/schema/packer.rb +90 -0
  61. data/lib/pcrd/schema/reader.rb +118 -0
  62. data/lib/pcrd/schema/setup.rb +143 -0
  63. data/lib/pcrd/schema/setup_error.rb +9 -0
  64. data/lib/pcrd/schema/table_not_found.rb +8 -0
  65. data/lib/pcrd/schema/type_registry.rb +116 -0
  66. data/lib/pcrd/sql.rb +55 -0
  67. data/lib/pcrd/transform/row_transformer.rb +69 -0
  68. data/lib/pcrd/transform/type_map.rb +209 -0
  69. data/lib/pcrd/transform/validator.rb +106 -0
  70. data/lib/pcrd/version.rb +5 -0
  71. data/lib/pcrd.rb +11 -0
  72. metadata +231 -0
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Pcrd
6
+ module Schema
7
+ # Computes the diff between source and target schemas, optionally guided
8
+ # by a migration spec (Config::Table).
9
+ #
10
+ # Two modes:
11
+ #
12
+ # Synthesis mode (target_columns: nil)
13
+ # The target schema is synthesized by applying the migration spec to the
14
+ # source columns. Use this to preview what the target will look like before
15
+ # the migration has run.
16
+ #
17
+ # Real-target mode (target_columns: [...])
18
+ # The target schema comes from a live database query. The spec is used
19
+ # to understand source→target column name mappings; without a spec,
20
+ # columns are matched by name.
21
+ class Differ
22
+ # Returns Array<DiffEntry> in source-column order, added columns last.
23
+ #
24
+ # source_columns: Array<Schema::Column>
25
+ # table_config: Config::Table or nil
26
+ # target_columns: Array<Schema::Column> or nil (triggers synthesis)
27
+ def diff(source_columns:, table_config: nil, target_columns: nil)
28
+ if target_columns.nil?
29
+ synthesize_diff(source_columns, table_config)
30
+ else
31
+ real_diff(source_columns, table_config, target_columns)
32
+ end
33
+ end
34
+
35
+ # Extracts target-side columns from a diff for use in padding analysis.
36
+ def target_columns_from_diff(entries)
37
+ entries
38
+ .reject(&:dropped?)
39
+ .map(&:target_column)
40
+ .compact
41
+ end
42
+
43
+ private
44
+
45
+ # -----------------------------------------------------------------------
46
+ # Synthesis path: build target columns from source + spec
47
+ # -----------------------------------------------------------------------
48
+
49
+ def synthesize_diff(source_columns, table_config)
50
+ spec_columns = table_config&.columns || {}
51
+ entries = []
52
+
53
+ source_columns.each do |src|
54
+ col_spec = spec_columns[src.name]
55
+
56
+ if col_spec&.drop
57
+ entries << DiffEntry.new(status: :dropped, source_column: src, target_column: nil)
58
+ else
59
+ target = synthesize_column(src, col_spec)
60
+ status = compute_status(src, target, col_spec)
61
+ entries << DiffEntry.new(status: status, source_column: src, target_column: target)
62
+ end
63
+ end
64
+
65
+ # Added columns come last.
66
+ (table_config&.add_columns || []).each do |add_col|
67
+ target = build_added_column(add_col)
68
+ entries << DiffEntry.new(status: :added, source_column: nil, target_column: target)
69
+ end
70
+
71
+ entries
72
+ end
73
+
74
+ def synthesize_column(source_col, col_spec)
75
+ new_name = col_spec&.rename || source_col.name
76
+ new_type = col_spec&.type
77
+
78
+ if new_type
79
+ info = TypeRegistry.lookup(new_type)
80
+ Column.new(
81
+ attnum: source_col.attnum,
82
+ name: new_name,
83
+ type_name: info.canonical_name,
84
+ formatted_type: new_type,
85
+ alignment: info.alignment,
86
+ fixed_size: info.fixed_size,
87
+ nullable: source_col.nullable,
88
+ default_expr: source_col.default_expr
89
+ )
90
+ else
91
+ Column.new(
92
+ attnum: source_col.attnum,
93
+ name: new_name,
94
+ type_name: source_col.type_name,
95
+ formatted_type: source_col.formatted_type,
96
+ alignment: source_col.alignment,
97
+ fixed_size: source_col.fixed_size,
98
+ nullable: source_col.nullable,
99
+ default_expr: source_col.default_expr
100
+ )
101
+ end
102
+ end
103
+
104
+ def build_added_column(add_col)
105
+ info = TypeRegistry.lookup(add_col.type)
106
+ Column.new(
107
+ attnum: nil,
108
+ name: add_col.name,
109
+ type_name: info.canonical_name,
110
+ formatted_type: add_col.type,
111
+ alignment: info.alignment,
112
+ fixed_size: info.fixed_size,
113
+ nullable: true,
114
+ default_expr: add_col.default
115
+ )
116
+ end
117
+
118
+ def compute_status(src, target, col_spec)
119
+ type_changed = src.type_name != target.type_name ||
120
+ src.formatted_type.downcase != target.formatted_type.downcase
121
+ name_changed = src.name != target.name
122
+
123
+ if type_changed && name_changed
124
+ :type_and_renamed
125
+ elsif type_changed
126
+ :type_changed
127
+ elsif name_changed
128
+ :renamed
129
+ else
130
+ :unchanged
131
+ end
132
+ end
133
+
134
+ # -----------------------------------------------------------------------
135
+ # Real-target path: match live source and target columns
136
+ # -----------------------------------------------------------------------
137
+
138
+ def real_diff(source_columns, table_config, target_columns)
139
+ spec_columns = table_config&.columns || {}
140
+ target_by_name = target_columns.each_with_object({}) { |c, h| h[c.name] = c }
141
+ entries = []
142
+ matched_targets = Set.new
143
+
144
+ source_columns.each do |src|
145
+ col_spec = spec_columns[src.name]
146
+ target_name = col_spec&.rename || src.name
147
+
148
+ if col_spec&.drop
149
+ entries << DiffEntry.new(status: :dropped, source_column: src, target_column: nil)
150
+ next
151
+ end
152
+
153
+ tgt = target_by_name[target_name]
154
+ if tgt
155
+ matched_targets << tgt.name
156
+ status = compute_status(src, tgt, col_spec)
157
+ entries << DiffEntry.new(status: status, source_column: src, target_column: tgt)
158
+ else
159
+ # Column expected on target but not found — treat as dropped.
160
+ entries << DiffEntry.new(status: :dropped, source_column: src, target_column: nil)
161
+ end
162
+ end
163
+
164
+ # Columns present on target but not matched from source are additions.
165
+ target_columns.each do |tgt|
166
+ next if matched_targets.include?(tgt.name)
167
+
168
+ entries << DiffEntry.new(status: :added, source_column: nil, target_column: tgt)
169
+ end
170
+
171
+ entries
172
+ end
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Reads the "secondary" schema objects that the load DDL deliberately omits
6
+ # (Schema::DDL creates only the table + primary key). Used by the
7
+ # target-readiness manifest to report and regenerate what must exist on the
8
+ # target before cutover.
9
+ #
10
+ # Works against either cluster — point it at the source to discover objects,
11
+ # at the target to see what already exists.
12
+ class ObjectReader
13
+ Index = Data.define(:name, :definition, :unique, :columns)
14
+ Constraint = Data.define(:name, :kind, :definition, :columns) # kind: f|u|c
15
+ IdentityColumn = Data.define(:column, :kind) # kind: "identity" | "serial"
16
+ Grant = Data.define(:grantee, :privileges)
17
+
18
+ def initialize(pool)
19
+ @pool = pool
20
+ end
21
+
22
+ # Non-PK indexes that are not backing a unique/PK constraint (those are
23
+ # reported under #constraints instead, to avoid double-counting).
24
+ def indexes(table_name, schema_name: "public")
25
+ @pool.exec(INDEXES_SQL, [table_name, schema_name]).map do |r|
26
+ Index.new(
27
+ name: r["index_name"],
28
+ definition: r["definition"],
29
+ unique: r["indisunique"] == "t",
30
+ columns: split_list(r["columns"])
31
+ )
32
+ end
33
+ end
34
+
35
+ # Foreign-key, unique, and check constraints (not the primary key).
36
+ def constraints(table_name, schema_name: "public")
37
+ @pool.exec(CONSTRAINTS_SQL, [table_name, schema_name]).map do |r|
38
+ Constraint.new(
39
+ name: r["conname"],
40
+ kind: r["contype"],
41
+ definition: r["definition"],
42
+ columns: split_list(r["columns"])
43
+ )
44
+ end
45
+ end
46
+
47
+ # Identity (GENERATED ... AS IDENTITY) and serial (nextval default) columns.
48
+ def identity_columns(table_name, schema_name: "public")
49
+ @pool.exec(IDENTITY_SQL, [table_name, schema_name]).map do |r|
50
+ IdentityColumn.new(
51
+ column: r["attname"],
52
+ kind: r["attidentity"].to_s.empty? ? "serial" : "identity"
53
+ )
54
+ end
55
+ end
56
+
57
+ # The table's owner role name.
58
+ def owner(table_name, schema_name: "public")
59
+ row = @pool.exec(OWNER_SQL, [table_name, schema_name])
60
+ row.ntuples.zero? ? nil : row[0]["owner"]
61
+ end
62
+
63
+ # Explicit table privileges (the owner's implicit grant is excluded), one
64
+ # Grant per grantee with its sorted privilege list.
65
+ def grants(table_name, schema_name: "public")
66
+ owner_name = owner(table_name, schema_name: schema_name)
67
+ by_grantee = Hash.new { |h, k| h[k] = [] }
68
+
69
+ @pool.exec(GRANTS_SQL, [table_name, schema_name]).each do |r|
70
+ next if r["grantee"] == owner_name # owner already has everything
71
+
72
+ by_grantee[r["grantee"]] << r["privilege_type"]
73
+ end
74
+
75
+ by_grantee.map { |grantee, privs| Grant.new(grantee: grantee, privileges: privs.sort) }
76
+ end
77
+
78
+ # The table's COMMENT, or nil.
79
+ def table_comment(table_name, schema_name: "public")
80
+ row = @pool.exec(TABLE_COMMENT_SQL, [table_name, schema_name])
81
+ row.ntuples.zero? ? nil : row[0]["comment"]
82
+ end
83
+
84
+ # Hash<column_name, comment> for columns that have a COMMENT.
85
+ def column_comments(table_name, schema_name: "public")
86
+ @pool.exec(COLUMN_COMMENTS_SQL, [table_name, schema_name])
87
+ .each_with_object({}) { |r, h| h[r["attname"]] = r["comment"] }
88
+ end
89
+
90
+ private
91
+
92
+ def split_list(str)
93
+ str.to_s.empty? ? [] : str.split(",")
94
+ end
95
+
96
+ OWNER_SQL = <<~SQL.freeze
97
+ SELECT pg_get_userbyid(c.relowner) AS owner
98
+ FROM pg_class c
99
+ JOIN pg_namespace n ON n.oid = c.relnamespace
100
+ WHERE c.relname = $1 AND n.nspname = $2
101
+ SQL
102
+
103
+ GRANTS_SQL = <<~SQL.freeze
104
+ SELECT CASE WHEN g.grantee = 0 THEN 'PUBLIC'
105
+ ELSE pg_get_userbyid(g.grantee) END AS grantee,
106
+ g.privilege_type
107
+ FROM pg_class c
108
+ JOIN pg_namespace n ON n.oid = c.relnamespace
109
+ CROSS JOIN LATERAL aclexplode(c.relacl) AS g
110
+ WHERE c.relname = $1 AND n.nspname = $2
111
+ SQL
112
+
113
+ TABLE_COMMENT_SQL = <<~SQL.freeze
114
+ SELECT obj_description(c.oid, 'pg_class') AS comment
115
+ FROM pg_class c
116
+ JOIN pg_namespace n ON n.oid = c.relnamespace
117
+ WHERE c.relname = $1 AND n.nspname = $2
118
+ SQL
119
+
120
+ COLUMN_COMMENTS_SQL = <<~SQL.freeze
121
+ SELECT a.attname, col_description(a.attrelid, a.attnum) AS comment
122
+ FROM pg_attribute a
123
+ JOIN pg_class c ON c.oid = a.attrelid
124
+ JOIN pg_namespace n ON n.oid = c.relnamespace
125
+ WHERE c.relname = $1 AND n.nspname = $2
126
+ AND a.attnum > 0 AND NOT a.attisdropped
127
+ AND col_description(a.attrelid, a.attnum) IS NOT NULL
128
+ ORDER BY a.attnum
129
+ SQL
130
+
131
+ INDEXES_SQL = <<~SQL.freeze
132
+ SELECT i.relname AS index_name,
133
+ pg_get_indexdef(ix.indexrelid) AS definition,
134
+ ix.indisunique,
135
+ array_to_string(ARRAY(
136
+ SELECT a.attname
137
+ FROM unnest(ix.indkey) WITH ORDINALITY AS k(attnum, ord)
138
+ JOIN pg_attribute a ON a.attrelid = ix.indrelid AND a.attnum = k.attnum
139
+ WHERE k.attnum <> 0
140
+ ORDER BY k.ord
141
+ ), ',') AS columns
142
+ FROM pg_index ix
143
+ JOIN pg_class i ON i.oid = ix.indexrelid
144
+ JOIN pg_class t ON t.oid = ix.indrelid
145
+ JOIN pg_namespace n ON n.oid = t.relnamespace
146
+ WHERE t.relname = $1 AND n.nspname = $2
147
+ AND NOT ix.indisprimary
148
+ AND ix.indexrelid NOT IN (
149
+ SELECT conindid FROM pg_constraint
150
+ WHERE conrelid = t.oid AND conindid <> 0
151
+ )
152
+ ORDER BY i.relname
153
+ SQL
154
+
155
+ CONSTRAINTS_SQL = <<~SQL.freeze
156
+ SELECT c.conname,
157
+ c.contype,
158
+ pg_get_constraintdef(c.oid) AS definition,
159
+ array_to_string(ARRAY(
160
+ SELECT a.attname
161
+ FROM unnest(c.conkey) WITH ORDINALITY AS k(attnum, ord)
162
+ JOIN pg_attribute a ON a.attrelid = c.conrelid AND a.attnum = k.attnum
163
+ ORDER BY k.ord
164
+ ), ',') AS columns
165
+ FROM pg_constraint c
166
+ JOIN pg_class t ON t.oid = c.conrelid
167
+ JOIN pg_namespace n ON n.oid = t.relnamespace
168
+ WHERE t.relname = $1 AND n.nspname = $2
169
+ AND c.contype IN ('f', 'u', 'c')
170
+ ORDER BY c.conname
171
+ SQL
172
+
173
+ IDENTITY_SQL = <<~SQL.freeze
174
+ SELECT a.attname, a.attidentity
175
+ FROM pg_attribute a
176
+ JOIN pg_class t ON t.oid = a.attrelid
177
+ JOIN pg_namespace n ON n.oid = t.relnamespace
178
+ LEFT JOIN pg_attrdef ad ON ad.adrelid = a.attrelid AND ad.adnum = a.attnum
179
+ WHERE t.relname = $1 AND n.nspname = $2
180
+ AND a.attnum > 0 AND NOT a.attisdropped
181
+ AND (a.attidentity IN ('a', 'd')
182
+ OR pg_get_expr(ad.adbin, ad.adrelid) LIKE 'nextval(%')
183
+ ORDER BY a.attnum
184
+ SQL
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ # Analyzes column alignment and computes optimal ordering to minimize padding waste.
6
+ #
7
+ # PostgreSQL stores columns in definition order. Each column must start at an
8
+ # address aligned to its type's natural alignment boundary. When a small-aligned
9
+ # column (e.g. bool, 1 byte) precedes a large-aligned column (e.g. timestamp,
10
+ # 8 bytes), PostgreSQL inserts padding bytes to satisfy the alignment requirement
11
+ # of the larger type. Reordering columns largest-alignment-first eliminates this
12
+ # waste entirely for fixed-size columns.
13
+ #
14
+ # Variable-length columns (text, varchar, numeric, etc.) have a 4-byte aligned
15
+ # varlena header. Their actual content length is not predictable, so we count only
16
+ # the header for padding estimates and place them last where they contribute no
17
+ # cross-column alignment overhead.
18
+ class Packer
19
+ # A single entry in a layout: the column plus the padding bytes inserted
20
+ # before it to satisfy its alignment requirement.
21
+ LayoutEntry = Data.define(:column, :offset, :padding_before)
22
+
23
+ # Returns columns in optimal order: 8-byte → 4-byte → 2-byte → 1-byte → variable.
24
+ # Within each alignment tier, preserves the original column order.
25
+ def optimize(columns)
26
+ fixed = columns.select(&:fixed?)
27
+ variable = columns.select(&:variable?)
28
+ sorted_fixed = fixed.sort_by.with_index { |c, i| [-c.alignment, -c.fixed_size, i] }
29
+ sorted_fixed + variable
30
+ end
31
+
32
+ # Computes the per-column layout (offset and padding before each column).
33
+ # Returns Array<LayoutEntry>.
34
+ def layout(columns)
35
+ offset = 0
36
+ entries = []
37
+
38
+ columns.each do |col|
39
+ align = col.fixed? ? col.alignment : 4 # varlena header is 4-byte aligned
40
+ padding = padding_needed(offset, align)
41
+ entries << LayoutEntry.new(column: col, offset: offset + padding, padding_before: padding)
42
+ offset += padding + (col.fixed? ? col.fixed_size : 4) # count header only for varlena
43
+ end
44
+
45
+ entries
46
+ end
47
+
48
+ # Estimated bytes consumed by fixed-length columns plus alignment padding.
49
+ # Variable-length columns contribute 4 bytes each (header only).
50
+ def estimated_row_size(columns)
51
+ layout(columns).sum do |e|
52
+ e.padding_before + (e.column.fixed? ? e.column.fixed_size : 4)
53
+ end
54
+ end
55
+
56
+ # Total wasted padding bytes across all columns.
57
+ def total_padding(columns)
58
+ layout(columns).sum(&:padding_before)
59
+ end
60
+
61
+ # Returns a report hash comparing current vs optimal layout.
62
+ def report(columns)
63
+ optimal_order = optimize(columns)
64
+
65
+ current_size = estimated_row_size(columns)
66
+ optimal_size = estimated_row_size(optimal_order)
67
+ saved_bytes = current_size - optimal_size
68
+ pct = current_size > 0 ? (saved_bytes.to_f / current_size * 100).round(1) : 0.0
69
+
70
+ {
71
+ current_columns: columns,
72
+ optimal_columns: optimal_order,
73
+ current_layout: layout(columns),
74
+ optimal_layout: layout(optimal_order),
75
+ current_size: current_size,
76
+ optimal_size: optimal_size,
77
+ saved_bytes: saved_bytes,
78
+ savings_pct: pct,
79
+ already_optimal: saved_bytes.zero?
80
+ }
81
+ end
82
+
83
+ private
84
+
85
+ def padding_needed(offset, alignment)
86
+ (alignment - (offset % alignment)) % alignment
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pcrd
4
+ module Schema
5
+ class Reader
6
+ TYPALIGN_BYTES = { "c" => 1, "s" => 2, "i" => 4, "d" => 8 }.freeze
7
+
8
+ def initialize(pool)
9
+ @pool = pool
10
+ end
11
+
12
+ def read(table_name, schema_name: "public")
13
+ rows = @pool.exec(COLUMNS_QUERY, [table_name, schema_name])
14
+ raise TableNotFound, "Table #{schema_name}.#{table_name} not found" if rows.ntuples.zero?
15
+
16
+ rows.map { build_column(_1) }
17
+ end
18
+
19
+ def table_exists?(table_name, schema_name: "public")
20
+ result = @pool.exec(<<~SQL, [table_name, schema_name])
21
+ SELECT 1
22
+ FROM pg_class c
23
+ JOIN pg_namespace n ON n.oid = c.relnamespace
24
+ WHERE c.relname = $1
25
+ AND n.nspname = $2
26
+ AND c.relkind = 'r'
27
+ SQL
28
+ result.ntuples > 0
29
+ end
30
+
31
+ # Returns an array of column names that form the primary key, in key order.
32
+ def primary_key_columns(table_name, schema_name: "public")
33
+ result = @pool.exec(<<~SQL, [table_name, schema_name])
34
+ SELECT a.attname
35
+ FROM pg_index i
36
+ JOIN pg_attribute a ON a.attrelid = i.indrelid
37
+ AND a.attnum = ANY(i.indkey)
38
+ JOIN pg_class c ON c.oid = i.indrelid
39
+ JOIN pg_namespace n ON n.oid = c.relnamespace
40
+ WHERE c.relname = $1
41
+ AND n.nspname = $2
42
+ AND i.indisprimary
43
+ ORDER BY array_position(i.indkey, a.attnum)
44
+ SQL
45
+ result.column_values(0)
46
+ end
47
+
48
+ # Returns the table's replica identity setting as a single char:
49
+ # 'd' default (primary key) 'n' nothing
50
+ # 'f' full (whole row) 'i' a specific unique index
51
+ # Returns nil if the table is not found. This governs whether UPDATE/DELETE
52
+ # WAL records carry the old-row key columns the apply engine needs.
53
+ def replica_identity(table_name, schema_name: "public")
54
+ result = @pool.exec(<<~SQL, [table_name, schema_name])
55
+ SELECT c.relreplident
56
+ FROM pg_class c
57
+ JOIN pg_namespace n ON n.oid = c.relnamespace
58
+ WHERE c.relname = $1
59
+ AND n.nspname = $2
60
+ SQL
61
+ result.ntuples > 0 ? result[0]["relreplident"] : nil
62
+ end
63
+
64
+ # Returns the estimated live row count from pg_class statistics.
65
+ def estimated_row_count(table_name, schema_name: "public")
66
+ result = @pool.exec(<<~SQL, [table_name, schema_name])
67
+ SELECT c.reltuples::bigint
68
+ FROM pg_class c
69
+ JOIN pg_namespace n ON n.oid = c.relnamespace
70
+ WHERE c.relname = $1
71
+ AND n.nspname = $2
72
+ SQL
73
+ result.ntuples > 0 ? result[0]["reltuples"].to_i : 0
74
+ end
75
+
76
+ private
77
+
78
+ COLUMNS_QUERY = <<~SQL.freeze
79
+ SELECT
80
+ a.attnum,
81
+ a.attname,
82
+ t.typname,
83
+ format_type(a.atttypid, a.atttypmod) AS formatted_type,
84
+ t.typalign,
85
+ t.typlen,
86
+ NOT a.attnotnull AS nullable,
87
+ pg_get_expr(d.adbin, d.adrelid) AS default_expr
88
+ FROM pg_attribute a
89
+ JOIN pg_class c ON c.oid = a.attrelid
90
+ JOIN pg_namespace n ON n.oid = c.relnamespace
91
+ JOIN pg_type t ON t.oid = a.atttypid
92
+ LEFT JOIN pg_attrdef d
93
+ ON d.adrelid = a.attrelid AND d.adnum = a.attnum
94
+ WHERE c.relname = $1
95
+ AND n.nspname = $2
96
+ AND a.attnum > 0
97
+ AND NOT a.attisdropped
98
+ ORDER BY a.attnum
99
+ SQL
100
+
101
+ def build_column(row)
102
+ typlen = row["typlen"].to_i
103
+ fixed_size = typlen > 0 ? typlen : nil # -1 = varlena, -2 = C string
104
+
105
+ Column.new(
106
+ attnum: row["attnum"].to_i,
107
+ name: row["attname"],
108
+ type_name: row["typname"],
109
+ formatted_type: row["formatted_type"],
110
+ alignment: TYPALIGN_BYTES.fetch(row["typalign"], 4),
111
+ fixed_size: fixed_size,
112
+ nullable: row["nullable"] == "t",
113
+ default_expr: row["default_expr"]
114
+ )
115
+ end
116
+ end
117
+ end
118
+ end