pcrd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +24 -0
- data/LICENSE +21 -0
- data/README.md +614 -0
- data/bin/pcrd +7 -0
- data/lib/pcrd/advisory_lock.rb +50 -0
- data/lib/pcrd/apply/engine.rb +184 -0
- data/lib/pcrd/apply/worker.rb +97 -0
- data/lib/pcrd/backfill/batch.rb +158 -0
- data/lib/pcrd/backfill/engine.rb +153 -0
- data/lib/pcrd/checkpoint/store.rb +217 -0
- data/lib/pcrd/cli.rb +274 -0
- data/lib/pcrd/commands/analyze.rb +125 -0
- data/lib/pcrd/commands/cleanup.rb +112 -0
- data/lib/pcrd/commands/demo.rb +152 -0
- data/lib/pcrd/commands/readiness.rb +30 -0
- data/lib/pcrd/commands/status.rb +129 -0
- data/lib/pcrd/commands/verify.rb +172 -0
- data/lib/pcrd/config/add_column.rb +7 -0
- data/lib/pcrd/config/analyze_config.rb +8 -0
- data/lib/pcrd/config/column_spec.rb +10 -0
- data/lib/pcrd/config/connection.rb +7 -0
- data/lib/pcrd/config/cutover_config.rb +7 -0
- data/lib/pcrd/config/load_error.rb +7 -0
- data/lib/pcrd/config/loader.rb +158 -0
- data/lib/pcrd/config/migrate_config.rb +21 -0
- data/lib/pcrd/config/root.rb +9 -0
- data/lib/pcrd/config/schema.rb +62 -0
- data/lib/pcrd/config/table.rb +9 -0
- data/lib/pcrd/config/verify_config.rb +7 -0
- data/lib/pcrd/config.rb +7 -0
- data/lib/pcrd/connection/client.rb +129 -0
- data/lib/pcrd/connection/error.rb +7 -0
- data/lib/pcrd/connection/replication.rb +108 -0
- data/lib/pcrd/cutover/orchestrator.rb +108 -0
- data/lib/pcrd/cutover/sequences.rb +138 -0
- data/lib/pcrd/demo/generator.rb +214 -0
- data/lib/pcrd/demo/schema.rb +154 -0
- data/lib/pcrd/error.rb +12 -0
- data/lib/pcrd/migration/orchestrator.rb +272 -0
- data/lib/pcrd/monitor/lag.rb +107 -0
- data/lib/pcrd/options.rb +15 -0
- data/lib/pcrd/output/analyze_printer.rb +173 -0
- data/lib/pcrd/output/cutover_printer.rb +128 -0
- data/lib/pcrd/output/preflight_printer.rb +119 -0
- data/lib/pcrd/output/readiness_printer.rb +72 -0
- data/lib/pcrd/preflight.rb +331 -0
- data/lib/pcrd/readiness/manifest.rb +201 -0
- data/lib/pcrd/replication/consumer.rb +235 -0
- data/lib/pcrd/replication/error.rb +10 -0
- data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
- data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
- data/lib/pcrd/reporter/console.rb +46 -0
- data/lib/pcrd/reporter/null.rb +14 -0
- data/lib/pcrd/schema/column.rb +59 -0
- data/lib/pcrd/schema/ddl.rb +71 -0
- data/lib/pcrd/schema/diff_entry.rb +36 -0
- data/lib/pcrd/schema/differ.rb +175 -0
- data/lib/pcrd/schema/object_reader.rb +187 -0
- data/lib/pcrd/schema/packer.rb +90 -0
- data/lib/pcrd/schema/reader.rb +118 -0
- data/lib/pcrd/schema/setup.rb +143 -0
- data/lib/pcrd/schema/setup_error.rb +9 -0
- data/lib/pcrd/schema/table_not_found.rb +8 -0
- data/lib/pcrd/schema/type_registry.rb +116 -0
- data/lib/pcrd/sql.rb +55 -0
- data/lib/pcrd/transform/row_transformer.rb +69 -0
- data/lib/pcrd/transform/type_map.rb +209 -0
- data/lib/pcrd/transform/validator.rb +106 -0
- data/lib/pcrd/version.rb +5 -0
- data/lib/pcrd.rb +11 -0
- metadata +231 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Pcrd
|
|
6
|
+
module Schema
|
|
7
|
+
# Computes the diff between source and target schemas, optionally guided
|
|
8
|
+
# by a migration spec (Config::Table).
|
|
9
|
+
#
|
|
10
|
+
# Two modes:
|
|
11
|
+
#
|
|
12
|
+
# Synthesis mode (target_columns: nil)
|
|
13
|
+
# The target schema is synthesized by applying the migration spec to the
|
|
14
|
+
# source columns. Use this to preview what the target will look like before
|
|
15
|
+
# the migration has run.
|
|
16
|
+
#
|
|
17
|
+
# Real-target mode (target_columns: [...])
|
|
18
|
+
# The target schema comes from a live database query. The spec is used
|
|
19
|
+
# to understand source→target column name mappings; without a spec,
|
|
20
|
+
# columns are matched by name.
|
|
21
|
+
class Differ
|
|
22
|
+
# Returns Array<DiffEntry> in source-column order, added columns last.
|
|
23
|
+
#
|
|
24
|
+
# source_columns: Array<Schema::Column>
|
|
25
|
+
# table_config: Config::Table or nil
|
|
26
|
+
# target_columns: Array<Schema::Column> or nil (triggers synthesis)
|
|
27
|
+
def diff(source_columns:, table_config: nil, target_columns: nil)
|
|
28
|
+
if target_columns.nil?
|
|
29
|
+
synthesize_diff(source_columns, table_config)
|
|
30
|
+
else
|
|
31
|
+
real_diff(source_columns, table_config, target_columns)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Extracts target-side columns from a diff for use in padding analysis.
|
|
36
|
+
def target_columns_from_diff(entries)
|
|
37
|
+
entries
|
|
38
|
+
.reject(&:dropped?)
|
|
39
|
+
.map(&:target_column)
|
|
40
|
+
.compact
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
# -----------------------------------------------------------------------
|
|
46
|
+
# Synthesis path: build target columns from source + spec
|
|
47
|
+
# -----------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
def synthesize_diff(source_columns, table_config)
|
|
50
|
+
spec_columns = table_config&.columns || {}
|
|
51
|
+
entries = []
|
|
52
|
+
|
|
53
|
+
source_columns.each do |src|
|
|
54
|
+
col_spec = spec_columns[src.name]
|
|
55
|
+
|
|
56
|
+
if col_spec&.drop
|
|
57
|
+
entries << DiffEntry.new(status: :dropped, source_column: src, target_column: nil)
|
|
58
|
+
else
|
|
59
|
+
target = synthesize_column(src, col_spec)
|
|
60
|
+
status = compute_status(src, target, col_spec)
|
|
61
|
+
entries << DiffEntry.new(status: status, source_column: src, target_column: target)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Added columns come last.
|
|
66
|
+
(table_config&.add_columns || []).each do |add_col|
|
|
67
|
+
target = build_added_column(add_col)
|
|
68
|
+
entries << DiffEntry.new(status: :added, source_column: nil, target_column: target)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
entries
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def synthesize_column(source_col, col_spec)
|
|
75
|
+
new_name = col_spec&.rename || source_col.name
|
|
76
|
+
new_type = col_spec&.type
|
|
77
|
+
|
|
78
|
+
if new_type
|
|
79
|
+
info = TypeRegistry.lookup(new_type)
|
|
80
|
+
Column.new(
|
|
81
|
+
attnum: source_col.attnum,
|
|
82
|
+
name: new_name,
|
|
83
|
+
type_name: info.canonical_name,
|
|
84
|
+
formatted_type: new_type,
|
|
85
|
+
alignment: info.alignment,
|
|
86
|
+
fixed_size: info.fixed_size,
|
|
87
|
+
nullable: source_col.nullable,
|
|
88
|
+
default_expr: source_col.default_expr
|
|
89
|
+
)
|
|
90
|
+
else
|
|
91
|
+
Column.new(
|
|
92
|
+
attnum: source_col.attnum,
|
|
93
|
+
name: new_name,
|
|
94
|
+
type_name: source_col.type_name,
|
|
95
|
+
formatted_type: source_col.formatted_type,
|
|
96
|
+
alignment: source_col.alignment,
|
|
97
|
+
fixed_size: source_col.fixed_size,
|
|
98
|
+
nullable: source_col.nullable,
|
|
99
|
+
default_expr: source_col.default_expr
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def build_added_column(add_col)
|
|
105
|
+
info = TypeRegistry.lookup(add_col.type)
|
|
106
|
+
Column.new(
|
|
107
|
+
attnum: nil,
|
|
108
|
+
name: add_col.name,
|
|
109
|
+
type_name: info.canonical_name,
|
|
110
|
+
formatted_type: add_col.type,
|
|
111
|
+
alignment: info.alignment,
|
|
112
|
+
fixed_size: info.fixed_size,
|
|
113
|
+
nullable: true,
|
|
114
|
+
default_expr: add_col.default
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def compute_status(src, target, col_spec)
|
|
119
|
+
type_changed = src.type_name != target.type_name ||
|
|
120
|
+
src.formatted_type.downcase != target.formatted_type.downcase
|
|
121
|
+
name_changed = src.name != target.name
|
|
122
|
+
|
|
123
|
+
if type_changed && name_changed
|
|
124
|
+
:type_and_renamed
|
|
125
|
+
elsif type_changed
|
|
126
|
+
:type_changed
|
|
127
|
+
elsif name_changed
|
|
128
|
+
:renamed
|
|
129
|
+
else
|
|
130
|
+
:unchanged
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# -----------------------------------------------------------------------
|
|
135
|
+
# Real-target path: match live source and target columns
|
|
136
|
+
# -----------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def real_diff(source_columns, table_config, target_columns)
|
|
139
|
+
spec_columns = table_config&.columns || {}
|
|
140
|
+
target_by_name = target_columns.each_with_object({}) { |c, h| h[c.name] = c }
|
|
141
|
+
entries = []
|
|
142
|
+
matched_targets = Set.new
|
|
143
|
+
|
|
144
|
+
source_columns.each do |src|
|
|
145
|
+
col_spec = spec_columns[src.name]
|
|
146
|
+
target_name = col_spec&.rename || src.name
|
|
147
|
+
|
|
148
|
+
if col_spec&.drop
|
|
149
|
+
entries << DiffEntry.new(status: :dropped, source_column: src, target_column: nil)
|
|
150
|
+
next
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
tgt = target_by_name[target_name]
|
|
154
|
+
if tgt
|
|
155
|
+
matched_targets << tgt.name
|
|
156
|
+
status = compute_status(src, tgt, col_spec)
|
|
157
|
+
entries << DiffEntry.new(status: status, source_column: src, target_column: tgt)
|
|
158
|
+
else
|
|
159
|
+
# Column expected on target but not found — treat as dropped.
|
|
160
|
+
entries << DiffEntry.new(status: :dropped, source_column: src, target_column: nil)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Columns present on target but not matched from source are additions.
|
|
165
|
+
target_columns.each do |tgt|
|
|
166
|
+
next if matched_targets.include?(tgt.name)
|
|
167
|
+
|
|
168
|
+
entries << DiffEntry.new(status: :added, source_column: nil, target_column: tgt)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
entries
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Schema
|
|
5
|
+
# Reads the "secondary" schema objects that the load DDL deliberately omits
|
|
6
|
+
# (Schema::DDL creates only the table + primary key). Used by the
|
|
7
|
+
# target-readiness manifest to report and regenerate what must exist on the
|
|
8
|
+
# target before cutover.
|
|
9
|
+
#
|
|
10
|
+
# Works against either cluster — point it at the source to discover objects,
|
|
11
|
+
# at the target to see what already exists.
|
|
12
|
+
class ObjectReader
|
|
13
|
+
Index = Data.define(:name, :definition, :unique, :columns)
|
|
14
|
+
Constraint = Data.define(:name, :kind, :definition, :columns) # kind: f|u|c
|
|
15
|
+
IdentityColumn = Data.define(:column, :kind) # kind: "identity" | "serial"
|
|
16
|
+
Grant = Data.define(:grantee, :privileges)
|
|
17
|
+
|
|
18
|
+
def initialize(pool)
|
|
19
|
+
@pool = pool
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Non-PK indexes that are not backing a unique/PK constraint (those are
|
|
23
|
+
# reported under #constraints instead, to avoid double-counting).
|
|
24
|
+
def indexes(table_name, schema_name: "public")
|
|
25
|
+
@pool.exec(INDEXES_SQL, [table_name, schema_name]).map do |r|
|
|
26
|
+
Index.new(
|
|
27
|
+
name: r["index_name"],
|
|
28
|
+
definition: r["definition"],
|
|
29
|
+
unique: r["indisunique"] == "t",
|
|
30
|
+
columns: split_list(r["columns"])
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Foreign-key, unique, and check constraints (not the primary key).
|
|
36
|
+
def constraints(table_name, schema_name: "public")
|
|
37
|
+
@pool.exec(CONSTRAINTS_SQL, [table_name, schema_name]).map do |r|
|
|
38
|
+
Constraint.new(
|
|
39
|
+
name: r["conname"],
|
|
40
|
+
kind: r["contype"],
|
|
41
|
+
definition: r["definition"],
|
|
42
|
+
columns: split_list(r["columns"])
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Identity (GENERATED ... AS IDENTITY) and serial (nextval default) columns.
|
|
48
|
+
def identity_columns(table_name, schema_name: "public")
|
|
49
|
+
@pool.exec(IDENTITY_SQL, [table_name, schema_name]).map do |r|
|
|
50
|
+
IdentityColumn.new(
|
|
51
|
+
column: r["attname"],
|
|
52
|
+
kind: r["attidentity"].to_s.empty? ? "serial" : "identity"
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# The table's owner role name.
|
|
58
|
+
def owner(table_name, schema_name: "public")
|
|
59
|
+
row = @pool.exec(OWNER_SQL, [table_name, schema_name])
|
|
60
|
+
row.ntuples.zero? ? nil : row[0]["owner"]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Explicit table privileges (the owner's implicit grant is excluded), one
|
|
64
|
+
# Grant per grantee with its sorted privilege list.
|
|
65
|
+
def grants(table_name, schema_name: "public")
|
|
66
|
+
owner_name = owner(table_name, schema_name: schema_name)
|
|
67
|
+
by_grantee = Hash.new { |h, k| h[k] = [] }
|
|
68
|
+
|
|
69
|
+
@pool.exec(GRANTS_SQL, [table_name, schema_name]).each do |r|
|
|
70
|
+
next if r["grantee"] == owner_name # owner already has everything
|
|
71
|
+
|
|
72
|
+
by_grantee[r["grantee"]] << r["privilege_type"]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
by_grantee.map { |grantee, privs| Grant.new(grantee: grantee, privileges: privs.sort) }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# The table's COMMENT, or nil.
|
|
79
|
+
def table_comment(table_name, schema_name: "public")
|
|
80
|
+
row = @pool.exec(TABLE_COMMENT_SQL, [table_name, schema_name])
|
|
81
|
+
row.ntuples.zero? ? nil : row[0]["comment"]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Hash<column_name, comment> for columns that have a COMMENT.
|
|
85
|
+
def column_comments(table_name, schema_name: "public")
|
|
86
|
+
@pool.exec(COLUMN_COMMENTS_SQL, [table_name, schema_name])
|
|
87
|
+
.each_with_object({}) { |r, h| h[r["attname"]] = r["comment"] }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
def split_list(str)
|
|
93
|
+
str.to_s.empty? ? [] : str.split(",")
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
OWNER_SQL = <<~SQL.freeze
|
|
97
|
+
SELECT pg_get_userbyid(c.relowner) AS owner
|
|
98
|
+
FROM pg_class c
|
|
99
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
100
|
+
WHERE c.relname = $1 AND n.nspname = $2
|
|
101
|
+
SQL
|
|
102
|
+
|
|
103
|
+
GRANTS_SQL = <<~SQL.freeze
|
|
104
|
+
SELECT CASE WHEN g.grantee = 0 THEN 'PUBLIC'
|
|
105
|
+
ELSE pg_get_userbyid(g.grantee) END AS grantee,
|
|
106
|
+
g.privilege_type
|
|
107
|
+
FROM pg_class c
|
|
108
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
109
|
+
CROSS JOIN LATERAL aclexplode(c.relacl) AS g
|
|
110
|
+
WHERE c.relname = $1 AND n.nspname = $2
|
|
111
|
+
SQL
|
|
112
|
+
|
|
113
|
+
TABLE_COMMENT_SQL = <<~SQL.freeze
|
|
114
|
+
SELECT obj_description(c.oid, 'pg_class') AS comment
|
|
115
|
+
FROM pg_class c
|
|
116
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
117
|
+
WHERE c.relname = $1 AND n.nspname = $2
|
|
118
|
+
SQL
|
|
119
|
+
|
|
120
|
+
COLUMN_COMMENTS_SQL = <<~SQL.freeze
|
|
121
|
+
SELECT a.attname, col_description(a.attrelid, a.attnum) AS comment
|
|
122
|
+
FROM pg_attribute a
|
|
123
|
+
JOIN pg_class c ON c.oid = a.attrelid
|
|
124
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
125
|
+
WHERE c.relname = $1 AND n.nspname = $2
|
|
126
|
+
AND a.attnum > 0 AND NOT a.attisdropped
|
|
127
|
+
AND col_description(a.attrelid, a.attnum) IS NOT NULL
|
|
128
|
+
ORDER BY a.attnum
|
|
129
|
+
SQL
|
|
130
|
+
|
|
131
|
+
INDEXES_SQL = <<~SQL.freeze
|
|
132
|
+
SELECT i.relname AS index_name,
|
|
133
|
+
pg_get_indexdef(ix.indexrelid) AS definition,
|
|
134
|
+
ix.indisunique,
|
|
135
|
+
array_to_string(ARRAY(
|
|
136
|
+
SELECT a.attname
|
|
137
|
+
FROM unnest(ix.indkey) WITH ORDINALITY AS k(attnum, ord)
|
|
138
|
+
JOIN pg_attribute a ON a.attrelid = ix.indrelid AND a.attnum = k.attnum
|
|
139
|
+
WHERE k.attnum <> 0
|
|
140
|
+
ORDER BY k.ord
|
|
141
|
+
), ',') AS columns
|
|
142
|
+
FROM pg_index ix
|
|
143
|
+
JOIN pg_class i ON i.oid = ix.indexrelid
|
|
144
|
+
JOIN pg_class t ON t.oid = ix.indrelid
|
|
145
|
+
JOIN pg_namespace n ON n.oid = t.relnamespace
|
|
146
|
+
WHERE t.relname = $1 AND n.nspname = $2
|
|
147
|
+
AND NOT ix.indisprimary
|
|
148
|
+
AND ix.indexrelid NOT IN (
|
|
149
|
+
SELECT conindid FROM pg_constraint
|
|
150
|
+
WHERE conrelid = t.oid AND conindid <> 0
|
|
151
|
+
)
|
|
152
|
+
ORDER BY i.relname
|
|
153
|
+
SQL
|
|
154
|
+
|
|
155
|
+
CONSTRAINTS_SQL = <<~SQL.freeze
|
|
156
|
+
SELECT c.conname,
|
|
157
|
+
c.contype,
|
|
158
|
+
pg_get_constraintdef(c.oid) AS definition,
|
|
159
|
+
array_to_string(ARRAY(
|
|
160
|
+
SELECT a.attname
|
|
161
|
+
FROM unnest(c.conkey) WITH ORDINALITY AS k(attnum, ord)
|
|
162
|
+
JOIN pg_attribute a ON a.attrelid = c.conrelid AND a.attnum = k.attnum
|
|
163
|
+
ORDER BY k.ord
|
|
164
|
+
), ',') AS columns
|
|
165
|
+
FROM pg_constraint c
|
|
166
|
+
JOIN pg_class t ON t.oid = c.conrelid
|
|
167
|
+
JOIN pg_namespace n ON n.oid = t.relnamespace
|
|
168
|
+
WHERE t.relname = $1 AND n.nspname = $2
|
|
169
|
+
AND c.contype IN ('f', 'u', 'c')
|
|
170
|
+
ORDER BY c.conname
|
|
171
|
+
SQL
|
|
172
|
+
|
|
173
|
+
IDENTITY_SQL = <<~SQL.freeze
|
|
174
|
+
SELECT a.attname, a.attidentity
|
|
175
|
+
FROM pg_attribute a
|
|
176
|
+
JOIN pg_class t ON t.oid = a.attrelid
|
|
177
|
+
JOIN pg_namespace n ON n.oid = t.relnamespace
|
|
178
|
+
LEFT JOIN pg_attrdef ad ON ad.adrelid = a.attrelid AND ad.adnum = a.attnum
|
|
179
|
+
WHERE t.relname = $1 AND n.nspname = $2
|
|
180
|
+
AND a.attnum > 0 AND NOT a.attisdropped
|
|
181
|
+
AND (a.attidentity IN ('a', 'd')
|
|
182
|
+
OR pg_get_expr(ad.adbin, ad.adrelid) LIKE 'nextval(%')
|
|
183
|
+
ORDER BY a.attnum
|
|
184
|
+
SQL
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Schema
|
|
5
|
+
# Analyzes column alignment and computes optimal ordering to minimize padding waste.
|
|
6
|
+
#
|
|
7
|
+
# PostgreSQL stores columns in definition order. Each column must start at an
|
|
8
|
+
# address aligned to its type's natural alignment boundary. When a small-aligned
|
|
9
|
+
# column (e.g. bool, 1 byte) precedes a large-aligned column (e.g. timestamp,
|
|
10
|
+
# 8 bytes), PostgreSQL inserts padding bytes to satisfy the alignment requirement
|
|
11
|
+
# of the larger type. Reordering columns largest-alignment-first eliminates this
|
|
12
|
+
# waste entirely for fixed-size columns.
|
|
13
|
+
#
|
|
14
|
+
# Variable-length columns (text, varchar, numeric, etc.) have a 4-byte aligned
|
|
15
|
+
# varlena header. Their actual content length is not predictable, so we count only
|
|
16
|
+
# the header for padding estimates and place them last where they contribute no
|
|
17
|
+
# cross-column alignment overhead.
|
|
18
|
+
class Packer
|
|
19
|
+
# A single entry in a layout: the column plus the padding bytes inserted
|
|
20
|
+
# before it to satisfy its alignment requirement.
|
|
21
|
+
LayoutEntry = Data.define(:column, :offset, :padding_before)
|
|
22
|
+
|
|
23
|
+
# Returns columns in optimal order: 8-byte → 4-byte → 2-byte → 1-byte → variable.
|
|
24
|
+
# Within each alignment tier, preserves the original column order.
|
|
25
|
+
def optimize(columns)
|
|
26
|
+
fixed = columns.select(&:fixed?)
|
|
27
|
+
variable = columns.select(&:variable?)
|
|
28
|
+
sorted_fixed = fixed.sort_by.with_index { |c, i| [-c.alignment, -c.fixed_size, i] }
|
|
29
|
+
sorted_fixed + variable
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Computes the per-column layout (offset and padding before each column).
|
|
33
|
+
# Returns Array<LayoutEntry>.
|
|
34
|
+
def layout(columns)
|
|
35
|
+
offset = 0
|
|
36
|
+
entries = []
|
|
37
|
+
|
|
38
|
+
columns.each do |col|
|
|
39
|
+
align = col.fixed? ? col.alignment : 4 # varlena header is 4-byte aligned
|
|
40
|
+
padding = padding_needed(offset, align)
|
|
41
|
+
entries << LayoutEntry.new(column: col, offset: offset + padding, padding_before: padding)
|
|
42
|
+
offset += padding + (col.fixed? ? col.fixed_size : 4) # count header only for varlena
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
entries
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Estimated bytes consumed by fixed-length columns plus alignment padding.
|
|
49
|
+
# Variable-length columns contribute 4 bytes each (header only).
|
|
50
|
+
def estimated_row_size(columns)
|
|
51
|
+
layout(columns).sum do |e|
|
|
52
|
+
e.padding_before + (e.column.fixed? ? e.column.fixed_size : 4)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Total wasted padding bytes across all columns.
|
|
57
|
+
def total_padding(columns)
|
|
58
|
+
layout(columns).sum(&:padding_before)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Returns a report hash comparing current vs optimal layout.
|
|
62
|
+
def report(columns)
|
|
63
|
+
optimal_order = optimize(columns)
|
|
64
|
+
|
|
65
|
+
current_size = estimated_row_size(columns)
|
|
66
|
+
optimal_size = estimated_row_size(optimal_order)
|
|
67
|
+
saved_bytes = current_size - optimal_size
|
|
68
|
+
pct = current_size > 0 ? (saved_bytes.to_f / current_size * 100).round(1) : 0.0
|
|
69
|
+
|
|
70
|
+
{
|
|
71
|
+
current_columns: columns,
|
|
72
|
+
optimal_columns: optimal_order,
|
|
73
|
+
current_layout: layout(columns),
|
|
74
|
+
optimal_layout: layout(optimal_order),
|
|
75
|
+
current_size: current_size,
|
|
76
|
+
optimal_size: optimal_size,
|
|
77
|
+
saved_bytes: saved_bytes,
|
|
78
|
+
savings_pct: pct,
|
|
79
|
+
already_optimal: saved_bytes.zero?
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def padding_needed(offset, alignment)
|
|
86
|
+
(alignment - (offset % alignment)) % alignment
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Schema
|
|
5
|
+
class Reader
|
|
6
|
+
TYPALIGN_BYTES = { "c" => 1, "s" => 2, "i" => 4, "d" => 8 }.freeze
|
|
7
|
+
|
|
8
|
+
def initialize(pool)
|
|
9
|
+
@pool = pool
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def read(table_name, schema_name: "public")
|
|
13
|
+
rows = @pool.exec(COLUMNS_QUERY, [table_name, schema_name])
|
|
14
|
+
raise TableNotFound, "Table #{schema_name}.#{table_name} not found" if rows.ntuples.zero?
|
|
15
|
+
|
|
16
|
+
rows.map { build_column(_1) }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def table_exists?(table_name, schema_name: "public")
|
|
20
|
+
result = @pool.exec(<<~SQL, [table_name, schema_name])
|
|
21
|
+
SELECT 1
|
|
22
|
+
FROM pg_class c
|
|
23
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
24
|
+
WHERE c.relname = $1
|
|
25
|
+
AND n.nspname = $2
|
|
26
|
+
AND c.relkind = 'r'
|
|
27
|
+
SQL
|
|
28
|
+
result.ntuples > 0
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Returns an array of column names that form the primary key, in key order.
|
|
32
|
+
def primary_key_columns(table_name, schema_name: "public")
|
|
33
|
+
result = @pool.exec(<<~SQL, [table_name, schema_name])
|
|
34
|
+
SELECT a.attname
|
|
35
|
+
FROM pg_index i
|
|
36
|
+
JOIN pg_attribute a ON a.attrelid = i.indrelid
|
|
37
|
+
AND a.attnum = ANY(i.indkey)
|
|
38
|
+
JOIN pg_class c ON c.oid = i.indrelid
|
|
39
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
40
|
+
WHERE c.relname = $1
|
|
41
|
+
AND n.nspname = $2
|
|
42
|
+
AND i.indisprimary
|
|
43
|
+
ORDER BY array_position(i.indkey, a.attnum)
|
|
44
|
+
SQL
|
|
45
|
+
result.column_values(0)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Returns the table's replica identity setting as a single char:
|
|
49
|
+
# 'd' default (primary key) 'n' nothing
|
|
50
|
+
# 'f' full (whole row) 'i' a specific unique index
|
|
51
|
+
# Returns nil if the table is not found. This governs whether UPDATE/DELETE
|
|
52
|
+
# WAL records carry the old-row key columns the apply engine needs.
|
|
53
|
+
def replica_identity(table_name, schema_name: "public")
|
|
54
|
+
result = @pool.exec(<<~SQL, [table_name, schema_name])
|
|
55
|
+
SELECT c.relreplident
|
|
56
|
+
FROM pg_class c
|
|
57
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
58
|
+
WHERE c.relname = $1
|
|
59
|
+
AND n.nspname = $2
|
|
60
|
+
SQL
|
|
61
|
+
result.ntuples > 0 ? result[0]["relreplident"] : nil
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Returns the estimated live row count from pg_class statistics.
|
|
65
|
+
def estimated_row_count(table_name, schema_name: "public")
|
|
66
|
+
result = @pool.exec(<<~SQL, [table_name, schema_name])
|
|
67
|
+
SELECT c.reltuples::bigint
|
|
68
|
+
FROM pg_class c
|
|
69
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
70
|
+
WHERE c.relname = $1
|
|
71
|
+
AND n.nspname = $2
|
|
72
|
+
SQL
|
|
73
|
+
result.ntuples > 0 ? result[0]["reltuples"].to_i : 0
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
COLUMNS_QUERY = <<~SQL.freeze
|
|
79
|
+
SELECT
|
|
80
|
+
a.attnum,
|
|
81
|
+
a.attname,
|
|
82
|
+
t.typname,
|
|
83
|
+
format_type(a.atttypid, a.atttypmod) AS formatted_type,
|
|
84
|
+
t.typalign,
|
|
85
|
+
t.typlen,
|
|
86
|
+
NOT a.attnotnull AS nullable,
|
|
87
|
+
pg_get_expr(d.adbin, d.adrelid) AS default_expr
|
|
88
|
+
FROM pg_attribute a
|
|
89
|
+
JOIN pg_class c ON c.oid = a.attrelid
|
|
90
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
91
|
+
JOIN pg_type t ON t.oid = a.atttypid
|
|
92
|
+
LEFT JOIN pg_attrdef d
|
|
93
|
+
ON d.adrelid = a.attrelid AND d.adnum = a.attnum
|
|
94
|
+
WHERE c.relname = $1
|
|
95
|
+
AND n.nspname = $2
|
|
96
|
+
AND a.attnum > 0
|
|
97
|
+
AND NOT a.attisdropped
|
|
98
|
+
ORDER BY a.attnum
|
|
99
|
+
SQL
|
|
100
|
+
|
|
101
|
+
def build_column(row)
|
|
102
|
+
typlen = row["typlen"].to_i
|
|
103
|
+
fixed_size = typlen > 0 ? typlen : nil # -1 = varlena, -2 = C string
|
|
104
|
+
|
|
105
|
+
Column.new(
|
|
106
|
+
attnum: row["attnum"].to_i,
|
|
107
|
+
name: row["attname"],
|
|
108
|
+
type_name: row["typname"],
|
|
109
|
+
formatted_type: row["formatted_type"],
|
|
110
|
+
alignment: TYPALIGN_BYTES.fetch(row["typalign"], 4),
|
|
111
|
+
fixed_size: fixed_size,
|
|
112
|
+
nullable: row["nullable"] == "t",
|
|
113
|
+
default_expr: row["default_expr"]
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|