pcrd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +24 -0
- data/LICENSE +21 -0
- data/README.md +614 -0
- data/bin/pcrd +7 -0
- data/lib/pcrd/advisory_lock.rb +50 -0
- data/lib/pcrd/apply/engine.rb +184 -0
- data/lib/pcrd/apply/worker.rb +97 -0
- data/lib/pcrd/backfill/batch.rb +158 -0
- data/lib/pcrd/backfill/engine.rb +153 -0
- data/lib/pcrd/checkpoint/store.rb +217 -0
- data/lib/pcrd/cli.rb +274 -0
- data/lib/pcrd/commands/analyze.rb +125 -0
- data/lib/pcrd/commands/cleanup.rb +112 -0
- data/lib/pcrd/commands/demo.rb +152 -0
- data/lib/pcrd/commands/readiness.rb +30 -0
- data/lib/pcrd/commands/status.rb +129 -0
- data/lib/pcrd/commands/verify.rb +172 -0
- data/lib/pcrd/config/add_column.rb +7 -0
- data/lib/pcrd/config/analyze_config.rb +8 -0
- data/lib/pcrd/config/column_spec.rb +10 -0
- data/lib/pcrd/config/connection.rb +7 -0
- data/lib/pcrd/config/cutover_config.rb +7 -0
- data/lib/pcrd/config/load_error.rb +7 -0
- data/lib/pcrd/config/loader.rb +158 -0
- data/lib/pcrd/config/migrate_config.rb +21 -0
- data/lib/pcrd/config/root.rb +9 -0
- data/lib/pcrd/config/schema.rb +62 -0
- data/lib/pcrd/config/table.rb +9 -0
- data/lib/pcrd/config/verify_config.rb +7 -0
- data/lib/pcrd/config.rb +7 -0
- data/lib/pcrd/connection/client.rb +129 -0
- data/lib/pcrd/connection/error.rb +7 -0
- data/lib/pcrd/connection/replication.rb +108 -0
- data/lib/pcrd/cutover/orchestrator.rb +108 -0
- data/lib/pcrd/cutover/sequences.rb +138 -0
- data/lib/pcrd/demo/generator.rb +214 -0
- data/lib/pcrd/demo/schema.rb +154 -0
- data/lib/pcrd/error.rb +12 -0
- data/lib/pcrd/migration/orchestrator.rb +272 -0
- data/lib/pcrd/monitor/lag.rb +107 -0
- data/lib/pcrd/options.rb +15 -0
- data/lib/pcrd/output/analyze_printer.rb +173 -0
- data/lib/pcrd/output/cutover_printer.rb +128 -0
- data/lib/pcrd/output/preflight_printer.rb +119 -0
- data/lib/pcrd/output/readiness_printer.rb +72 -0
- data/lib/pcrd/preflight.rb +331 -0
- data/lib/pcrd/readiness/manifest.rb +201 -0
- data/lib/pcrd/replication/consumer.rb +235 -0
- data/lib/pcrd/replication/error.rb +10 -0
- data/lib/pcrd/replication/pgoutput/messages.rb +68 -0
- data/lib/pcrd/replication/pgoutput/parser.rb +316 -0
- data/lib/pcrd/reporter/console.rb +46 -0
- data/lib/pcrd/reporter/null.rb +14 -0
- data/lib/pcrd/schema/column.rb +59 -0
- data/lib/pcrd/schema/ddl.rb +71 -0
- data/lib/pcrd/schema/diff_entry.rb +36 -0
- data/lib/pcrd/schema/differ.rb +175 -0
- data/lib/pcrd/schema/object_reader.rb +187 -0
- data/lib/pcrd/schema/packer.rb +90 -0
- data/lib/pcrd/schema/reader.rb +118 -0
- data/lib/pcrd/schema/setup.rb +143 -0
- data/lib/pcrd/schema/setup_error.rb +9 -0
- data/lib/pcrd/schema/table_not_found.rb +8 -0
- data/lib/pcrd/schema/type_registry.rb +116 -0
- data/lib/pcrd/sql.rb +55 -0
- data/lib/pcrd/transform/row_transformer.rb +69 -0
- data/lib/pcrd/transform/type_map.rb +209 -0
- data/lib/pcrd/transform/validator.rb +106 -0
- data/lib/pcrd/version.rb +5 -0
- data/lib/pcrd.rb +11 -0
- metadata +231 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Cutover
|
|
5
|
+
# Advances sequences on the target cluster to be safely ahead of the source.
|
|
6
|
+
#
|
|
7
|
+
# Called during cutover after writes on the source have stopped (maintenance
|
|
8
|
+
# mode active). At that point the source sequence is frozen and we can
|
|
9
|
+
# safely compute the correct target value.
|
|
10
|
+
#
|
|
11
|
+
# For each serial/bigserial/identity column in the migrated tables:
|
|
12
|
+
# 1. Read last_value + is_called from the source sequence
|
|
13
|
+
# 2. Read MAX(pk_col) from the source table (covers rolled-back transactions
|
|
14
|
+
# that consumed sequence values without committing a row)
|
|
15
|
+
# 3. Take the maximum of both + safety_buffer
|
|
16
|
+
# 4. Call setval on the target sequence
|
|
17
|
+
#
|
|
18
|
+
# Returns an Array<SequenceResult> describing every setval performed.
|
|
19
|
+
class Sequences
|
|
20
|
+
SequenceResult = Data.define(
|
|
21
|
+
:table_name,
|
|
22
|
+
:column_name,
|
|
23
|
+
:source_seq_name,
|
|
24
|
+
:target_seq_name,
|
|
25
|
+
:source_last_value,
|
|
26
|
+
:source_max_id,
|
|
27
|
+
:target_value,
|
|
28
|
+
:safety_buffer
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def initialize(source_pool:, target_pool:, safety_buffer: 1_000)
|
|
32
|
+
@source = source_pool
|
|
33
|
+
@target = target_pool
|
|
34
|
+
@buffer = safety_buffer
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Advances sequences for all serial/identity columns in the given tables.
|
|
38
|
+
# Returns Array<SequenceResult>.
|
|
39
|
+
def advance(table_names)
|
|
40
|
+
results = []
|
|
41
|
+
table_names.each do |table_name|
|
|
42
|
+
sequences_for_table(table_name).each do |col_name, seq_name|
|
|
43
|
+
result = advance_one(table_name, col_name, seq_name)
|
|
44
|
+
results << result if result
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
results
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
# Returns Hash<column_name, qualified_sequence_name> for all owned sequences.
|
|
53
|
+
# Handles both SERIAL/BIGSERIAL columns and GENERATED ... AS IDENTITY columns.
|
|
54
|
+
def sequences_for_table(table_name)
|
|
55
|
+
result = @source.exec(<<~SQL, [table_name])
|
|
56
|
+
SELECT a.attname AS col_name,
|
|
57
|
+
n.nspname || '.' || seq.relname AS seq_name
|
|
58
|
+
FROM pg_depend d
|
|
59
|
+
JOIN pg_class seq ON seq.oid = d.objid AND seq.relkind = 'S'
|
|
60
|
+
JOIN pg_namespace n ON n.oid = seq.relnamespace
|
|
61
|
+
JOIN pg_attribute a
|
|
62
|
+
ON a.attrelid = d.refobjid
|
|
63
|
+
AND a.attnum = d.refobjsubid
|
|
64
|
+
JOIN pg_class c ON c.oid = a.attrelid AND c.relname = $1
|
|
65
|
+
WHERE d.classid = 'pg_class'::regclass
|
|
66
|
+
AND d.refclassid = 'pg_class'::regclass
|
|
67
|
+
AND d.deptype IN ('a', 'i')
|
|
68
|
+
SQL
|
|
69
|
+
|
|
70
|
+
result.each_with_object({}) do |row, h|
|
|
71
|
+
h[row["col_name"]] = row["seq_name"]
|
|
72
|
+
end
|
|
73
|
+
rescue Connection::Error
|
|
74
|
+
{}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def advance_one(table_name, col_name, source_seq_name)
|
|
78
|
+
# Read source sequence state
|
|
79
|
+
seq_row = @source.exec(
|
|
80
|
+
"SELECT last_value, is_called FROM #{source_seq_name}"
|
|
81
|
+
)[0]
|
|
82
|
+
source_last = seq_row["last_value"].to_i
|
|
83
|
+
is_called = seq_row["is_called"] == "t"
|
|
84
|
+
effective_last = is_called ? source_last : source_last - 1
|
|
85
|
+
|
|
86
|
+
# Read actual max value in table (accounts for rolled-back allocations)
|
|
87
|
+
quoted_col = @source.quote_ident(col_name)
|
|
88
|
+
quoted_table = @source.quote_ident(table_name)
|
|
89
|
+
max_row = @source.exec("SELECT COALESCE(MAX(#{quoted_col}), 0) AS m FROM #{quoted_table}")
|
|
90
|
+
source_max = max_row[0]["m"].to_i
|
|
91
|
+
|
|
92
|
+
target_value = [effective_last, source_max].max + @buffer
|
|
93
|
+
|
|
94
|
+
# Derive the target sequence name from the source (strip schema, use public.)
|
|
95
|
+
seq_base = source_seq_name.split(".").last
|
|
96
|
+
target_seq_name = "public.#{seq_base}"
|
|
97
|
+
|
|
98
|
+
# Create the sequence on the target if it doesn't already exist.
|
|
99
|
+
# pcrd strips sequences from generated DDL by design; cutover creates them.
|
|
100
|
+
ensure_target_sequence(table_name, col_name, target_seq_name)
|
|
101
|
+
|
|
102
|
+
@target.exec("SELECT setval($1, $2)", [target_seq_name, target_value])
|
|
103
|
+
|
|
104
|
+
SequenceResult.new(
|
|
105
|
+
table_name: table_name,
|
|
106
|
+
column_name: col_name,
|
|
107
|
+
source_seq_name: source_seq_name,
|
|
108
|
+
target_seq_name: target_seq_name,
|
|
109
|
+
source_last_value: source_last,
|
|
110
|
+
source_max_id: source_max,
|
|
111
|
+
target_value: target_value,
|
|
112
|
+
safety_buffer: @buffer
|
|
113
|
+
)
|
|
114
|
+
rescue Connection::Error => e
|
|
115
|
+
warn " Warning: could not advance sequence for #{table_name}.#{col_name}: #{e.message}"
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def ensure_target_sequence(table_name, col_name, seq_name)
|
|
120
|
+
exists = @target.exec(
|
|
121
|
+
"SELECT 1 FROM pg_class c " \
|
|
122
|
+
"JOIN pg_namespace n ON n.oid = c.relnamespace " \
|
|
123
|
+
"WHERE n.nspname || '.' || c.relname = $1 AND c.relkind = 'S'",
|
|
124
|
+
[seq_name]
|
|
125
|
+
).ntuples > 0
|
|
126
|
+
return if exists
|
|
127
|
+
|
|
128
|
+
qt = @target.quote_ident(table_name)
|
|
129
|
+
qc = @target.quote_ident(col_name)
|
|
130
|
+
@target.exec_sql(<<~SQL)
|
|
131
|
+
CREATE SEQUENCE #{seq_name};
|
|
132
|
+
ALTER TABLE #{qt} ALTER COLUMN #{qc} SET DEFAULT nextval('#{seq_name}');
|
|
133
|
+
ALTER SEQUENCE #{seq_name} OWNED BY #{qt}.#{qc};
|
|
134
|
+
SQL
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Demo
|
|
5
|
+
# Generates realistic-looking fake data for the demo schema.
|
|
6
|
+
# Uses no external dependencies — all data is synthesized from built-in arrays.
|
|
7
|
+
class Generator
|
|
8
|
+
BATCH_SIZE = 500
|
|
9
|
+
|
|
10
|
+
FIRST_NAMES = %w[
|
|
11
|
+
James Mary Robert Patricia John Jennifer Michael Linda William Barbara
|
|
12
|
+
David Susan Richard Karen Joseph Lisa Thomas Betty Charles Margaret
|
|
13
|
+
Christopher Sandra Daniel Ashley Paul Emily Mark Donna George Carol
|
|
14
|
+
Steven Ruth Kenneth Sharon Edward Michelle Brian Cynthia Ronald Laura
|
|
15
|
+
Anthony Kimberly Kevin Deborah Jason Rebecca Jeffrey Sharon Gary Helen
|
|
16
|
+
].freeze
|
|
17
|
+
|
|
18
|
+
LAST_NAMES = %w[
|
|
19
|
+
Smith Johnson Williams Brown Jones Garcia Miller Davis Wilson Anderson
|
|
20
|
+
Taylor Thomas Hernandez Moore Martin Jackson Thompson White Lopez Lee
|
|
21
|
+
Gonzalez Harris Clark Lewis Robinson Walker Perez Hall Young Allen
|
|
22
|
+
Sanchez Wright King Scott Green Baker Adams Nelson Hill Ramirez Campbell
|
|
23
|
+
Mitchell Roberts Carter Phillips Evans Turner Torres Parker Collins Edwards
|
|
24
|
+
].freeze
|
|
25
|
+
|
|
26
|
+
EMAIL_DOMAINS = %w[gmail.com yahoo.com hotmail.com outlook.com icloud.com].freeze
|
|
27
|
+
|
|
28
|
+
CITIES_STATES = [
|
|
29
|
+
["New York", "NY"], ["Los Angeles", "CA"], ["Chicago", "IL"],
|
|
30
|
+
["Houston", "TX"], ["Phoenix", "AZ"], ["Philadelphia", "PA"],
|
|
31
|
+
["San Antonio", "TX"], ["San Diego", "CA"], ["Dallas", "TX"],
|
|
32
|
+
["San Jose", "CA"], ["Austin", "TX"], ["Jacksonville", "FL"],
|
|
33
|
+
["Fort Worth", "TX"], ["Columbus", "OH"], ["Charlotte", "NC"],
|
|
34
|
+
["San Francisco", "CA"], ["Indianapolis", "IN"], ["Seattle", "WA"],
|
|
35
|
+
["Denver", "CO"], ["Nashville", "TN"], ["Oklahoma City", "OK"],
|
|
36
|
+
["El Paso", "TX"], ["Boston", "MA"], ["Portland", "OR"],
|
|
37
|
+
["Las Vegas", "NV"], ["Memphis", "TN"], ["Louisville", "KY"],
|
|
38
|
+
["Baltimore", "MD"], ["Milwaukee", "WI"], ["Albuquerque", "NM"],
|
|
39
|
+
["Tucson", "AZ"], ["Fresno", "CA"], ["Sacramento", "CA"],
|
|
40
|
+
["Mesa", "AZ"], ["Kansas City", "MO"], ["Atlanta", "GA"],
|
|
41
|
+
["Omaha", "NE"], ["Colorado Springs","CO"],["Raleigh", "NC"],
|
|
42
|
+
["Long Beach", "CA"], ["Virginia Beach", "VA"], ["Minneapolis", "MN"],
|
|
43
|
+
].freeze
|
|
44
|
+
|
|
45
|
+
STREET_SUFFIXES = %w[St Ave Blvd Dr Rd Way Ln Ct Pl Ter Circle].freeze
|
|
46
|
+
STREET_NAMES = %w[
|
|
47
|
+
Oak Maple Pine Cedar Elm Main Park Lake Hill River View Forest Sunset
|
|
48
|
+
Highland Meadow Ridge Valley Spring Garden Grove Willow Cherry Apple
|
|
49
|
+
].freeze
|
|
50
|
+
|
|
51
|
+
DESCRIPTIONS = [
|
|
52
|
+
"Charming property in a desirable neighborhood.",
|
|
53
|
+
"Move-in ready home with modern upgrades throughout.",
|
|
54
|
+
"Spacious floor plan with abundant natural light.",
|
|
55
|
+
"Updated kitchen and baths, hardwood floors.",
|
|
56
|
+
"Corner lot with mature landscaping and privacy.",
|
|
57
|
+
"Open concept living with high-end finishes.",
|
|
58
|
+
"Well-maintained property close to top-rated schools.",
|
|
59
|
+
"Quiet cul-de-sac location, walking distance to parks.",
|
|
60
|
+
"Investor opportunity or perfect primary residence.",
|
|
61
|
+
"Stunning views and outdoor entertaining space.",
|
|
62
|
+
"Classic architecture with contemporary updates.",
|
|
63
|
+
"Energy-efficient with solar panels and smart features.",
|
|
64
|
+
].freeze
|
|
65
|
+
|
|
66
|
+
def initialize(pool, seed: 42)
|
|
67
|
+
@pool = pool
|
|
68
|
+
@rng = Random.new(seed)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Generate users, agents, then listings in dependency order.
|
|
72
|
+
# Returns hash with row counts generated for each table.
|
|
73
|
+
def generate(listing_count:)
|
|
74
|
+
user_count = [[(listing_count / 10).ceil, 50].max, 500].min
|
|
75
|
+
agent_count = [[(listing_count / 20).ceil, 10].max, 100].min
|
|
76
|
+
|
|
77
|
+
$stdout.puts " Generating #{user_count} users..."
|
|
78
|
+
user_ids = insert_users(user_count)
|
|
79
|
+
|
|
80
|
+
$stdout.puts " Generating #{agent_count} agents..."
|
|
81
|
+
agent_ids = insert_agents(agent_count, user_ids: user_ids)
|
|
82
|
+
|
|
83
|
+
$stdout.puts " Generating #{listing_count} listings..."
|
|
84
|
+
insert_listings(listing_count, agent_ids: agent_ids)
|
|
85
|
+
|
|
86
|
+
{ users: user_count, agents: agent_count, listings: listing_count }
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
def insert_users(count)
|
|
92
|
+
ids = []
|
|
93
|
+
rows_batch(count) do |i|
|
|
94
|
+
first = FIRST_NAMES.sample(random: @rng)
|
|
95
|
+
last = LAST_NAMES.sample(random: @rng)
|
|
96
|
+
email = "#{first.downcase}.#{last.downcase}#{i}@#{EMAIL_DOMAINS.sample(random: @rng)}"
|
|
97
|
+
[
|
|
98
|
+
"false",
|
|
99
|
+
email,
|
|
100
|
+
first,
|
|
101
|
+
last,
|
|
102
|
+
random_past_timestamp(years: 5)
|
|
103
|
+
]
|
|
104
|
+
end.each_slice(BATCH_SIZE) do |batch|
|
|
105
|
+
result = @pool.exec(
|
|
106
|
+
"INSERT INTO users (is_admin, email, first_name, last_name, created_at) " \
|
|
107
|
+
"VALUES #{placeholders(batch, 5)} RETURNING id",
|
|
108
|
+
batch.flatten
|
|
109
|
+
)
|
|
110
|
+
ids.concat(result.column_values(0).map(&:to_i))
|
|
111
|
+
end
|
|
112
|
+
ids
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def insert_agents(count, user_ids:)
|
|
116
|
+
ids = []
|
|
117
|
+
rows_batch(count) do |_i|
|
|
118
|
+
hired = random_past_date(years: 10)
|
|
119
|
+
[
|
|
120
|
+
user_ids.sample(random: @rng),
|
|
121
|
+
"LIC-#{@rng.rand(100_000..999_999)}",
|
|
122
|
+
"true",
|
|
123
|
+
format("%.4f", @rng.rand(0.020..0.065)),
|
|
124
|
+
hired,
|
|
125
|
+
random_past_timestamp(years: 5)
|
|
126
|
+
]
|
|
127
|
+
end.each_slice(BATCH_SIZE) do |batch|
|
|
128
|
+
result = @pool.exec(
|
|
129
|
+
"INSERT INTO agents (user_id, license_number, active, commission_rate, hired_at, created_at) " \
|
|
130
|
+
"VALUES #{placeholders(batch, 6)} RETURNING id",
|
|
131
|
+
batch.flatten
|
|
132
|
+
)
|
|
133
|
+
ids.concat(result.column_values(0).map(&:to_i))
|
|
134
|
+
end
|
|
135
|
+
ids
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def insert_listings(count, agent_ids:)
|
|
139
|
+
rows_batch(count) do |_i|
|
|
140
|
+
city, state = CITIES_STATES.sample(random: @rng)
|
|
141
|
+
sqft = @rng.rand(500..7_500)
|
|
142
|
+
price = (@rng.rand(80_000..4_500_000) / 1000.0 * 1000).round(2)
|
|
143
|
+
ppsf = (price / sqft).round(4)
|
|
144
|
+
bedrooms = @rng.rand(1..6)
|
|
145
|
+
bathrooms = @rng.rand(1..4)
|
|
146
|
+
year_built = @rng.rand(1920..2024)
|
|
147
|
+
lat = @rng.rand(25.0..48.0).round(6)
|
|
148
|
+
lon = @rng.rand(-124.0..-67.0).round(6)
|
|
149
|
+
listed_at = random_past_timestamp(years: 3)
|
|
150
|
+
street_num = @rng.rand(1..9_999)
|
|
151
|
+
street = "#{street_num} #{STREET_NAMES.sample(random: @rng)} " \
|
|
152
|
+
"#{STREET_SUFFIXES.sample(random: @rng)}"
|
|
153
|
+
zip = format("%05d", @rng.rand(10_000..99_999))
|
|
154
|
+
[
|
|
155
|
+
"true",
|
|
156
|
+
price,
|
|
157
|
+
bedrooms,
|
|
158
|
+
"false",
|
|
159
|
+
listed_at,
|
|
160
|
+
sqft,
|
|
161
|
+
"false",
|
|
162
|
+
ppsf,
|
|
163
|
+
lon,
|
|
164
|
+
year_built,
|
|
165
|
+
listed_at,
|
|
166
|
+
bathrooms,
|
|
167
|
+
year_built > 2020 ? (@rng.rand < 0.5 ? "true" : "false") : "false",
|
|
168
|
+
lat,
|
|
169
|
+
DESCRIPTIONS.sample(random: @rng),
|
|
170
|
+
street,
|
|
171
|
+
city,
|
|
172
|
+
state,
|
|
173
|
+
zip,
|
|
174
|
+
agent_ids.sample(random: @rng)
|
|
175
|
+
]
|
|
176
|
+
end.each_slice(BATCH_SIZE) do |batch|
|
|
177
|
+
@pool.exec(
|
|
178
|
+
"INSERT INTO listings (" \
|
|
179
|
+
" active, list_price, bedrooms, has_garage, listed_at, square_feet," \
|
|
180
|
+
" is_featured, price_per_sqft, longitude, year_built, created_at," \
|
|
181
|
+
" bathrooms, is_new_construction, latitude, description," \
|
|
182
|
+
" address_line1, city, state_code, zip_code, agent_id" \
|
|
183
|
+
") VALUES #{placeholders(batch, 20)}",
|
|
184
|
+
batch.flatten
|
|
185
|
+
)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Builds an array of rows by calling block(index) for each row.
|
|
190
|
+
def rows_batch(count, &block)
|
|
191
|
+
count.times.map(&block)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Builds $1,$2,$3,... placeholder groups for multi-row insert.
|
|
195
|
+
def placeholders(rows, cols_per_row)
|
|
196
|
+
rows.each_with_index.map do |_, i|
|
|
197
|
+
base = i * cols_per_row + 1
|
|
198
|
+
"(#{(base..base + cols_per_row - 1).map { "$#{_1}" }.join(",")})"
|
|
199
|
+
end.join(",")
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def random_past_timestamp(years:)
|
|
203
|
+
seconds_ago = @rng.rand(0..(years * 365 * 24 * 3600))
|
|
204
|
+
offset = Time.now.to_i - seconds_ago
|
|
205
|
+
Time.at(offset).strftime("%Y-%m-%d %H:%M:%S")
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def random_past_date(years:)
|
|
209
|
+
days_ago = @rng.rand(0..(years * 365))
|
|
210
|
+
(Date.today - days_ago).to_s
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
module Demo
|
|
5
|
+
# DDL for the demo source database.
|
|
6
|
+
#
|
|
7
|
+
# The listings table is intentionally ordered to maximize padding waste —
|
|
8
|
+
# booleans and smallints are interleaved with 8-byte types (timestamps,
|
|
9
|
+
# double precision) throughout the column list. This creates a compelling
|
|
10
|
+
# before/after for `pcrd analyze`.
|
|
11
|
+
#
|
|
12
|
+
# The migration target will reorder columns optimally (bigint IDs,
|
|
13
|
+
# 8-byte types first, then 4-byte, then 2-byte, then 1-byte, then variable).
|
|
14
|
+
module Schema
|
|
15
|
+
DROP_SQL = <<~SQL.freeze
|
|
16
|
+
DROP TABLE IF EXISTS listings CASCADE;
|
|
17
|
+
DROP TABLE IF EXISTS agents CASCADE;
|
|
18
|
+
DROP TABLE IF EXISTS users CASCADE;
|
|
19
|
+
SQL
|
|
20
|
+
|
|
21
|
+
# Column ordering is deliberately poor to demonstrate padding analysis:
|
|
22
|
+
# booleans and smallints scattered among 8-byte types waste ~22 bytes/row.
|
|
23
|
+
# FK from listings → agents; executed after both tables are created.
|
|
24
|
+
LISTINGS_FK_DDL = <<~SQL.freeze
|
|
25
|
+
ALTER TABLE listings
|
|
26
|
+
ADD CONSTRAINT listings_agent_fk
|
|
27
|
+
FOREIGN KEY (agent_id) REFERENCES agents(id);
|
|
28
|
+
SQL
|
|
29
|
+
|
|
30
|
+
LISTINGS_DDL = <<~SQL.freeze
|
|
31
|
+
CREATE TABLE listings (
|
|
32
|
+
id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
|
|
33
|
+
active boolean NOT NULL DEFAULT true,
|
|
34
|
+
list_price numeric(10,2) NOT NULL,
|
|
35
|
+
bedrooms smallint,
|
|
36
|
+
has_garage boolean NOT NULL DEFAULT false,
|
|
37
|
+
listed_at timestamp NOT NULL DEFAULT now(),
|
|
38
|
+
square_feet integer,
|
|
39
|
+
is_featured boolean NOT NULL DEFAULT false,
|
|
40
|
+
price_per_sqft real,
|
|
41
|
+
longitude double precision,
|
|
42
|
+
year_built smallint,
|
|
43
|
+
created_at timestamp NOT NULL DEFAULT now(),
|
|
44
|
+
bathrooms smallint,
|
|
45
|
+
is_new_construction boolean NOT NULL DEFAULT false,
|
|
46
|
+
latitude double precision,
|
|
47
|
+
description text,
|
|
48
|
+
address_line1 varchar(255) NOT NULL,
|
|
49
|
+
city varchar(100) NOT NULL,
|
|
50
|
+
state_code char(2) NOT NULL,
|
|
51
|
+
zip_code varchar(10),
|
|
52
|
+
agent_id integer,
|
|
53
|
+
PRIMARY KEY (id)
|
|
54
|
+
);
|
|
55
|
+
SQL
|
|
56
|
+
|
|
57
|
+
USERS_DDL = <<~SQL.freeze
|
|
58
|
+
CREATE TABLE users (
|
|
59
|
+
id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
|
|
60
|
+
is_admin boolean NOT NULL DEFAULT false,
|
|
61
|
+
email varchar(255) NOT NULL,
|
|
62
|
+
first_name varchar(100),
|
|
63
|
+
last_name varchar(100),
|
|
64
|
+
created_at timestamp NOT NULL DEFAULT now(),
|
|
65
|
+
PRIMARY KEY (id),
|
|
66
|
+
UNIQUE (email)
|
|
67
|
+
);
|
|
68
|
+
SQL
|
|
69
|
+
|
|
70
|
+
AGENTS_DDL = <<~SQL.freeze
|
|
71
|
+
CREATE TABLE agents (
|
|
72
|
+
id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
|
|
73
|
+
user_id integer NOT NULL REFERENCES users(id),
|
|
74
|
+
license_number varchar(50),
|
|
75
|
+
active boolean NOT NULL DEFAULT true,
|
|
76
|
+
commission_rate numeric(5,4) NOT NULL DEFAULT 0.0300,
|
|
77
|
+
hired_at date,
|
|
78
|
+
created_at timestamp NOT NULL DEFAULT now(),
|
|
79
|
+
PRIMARY KEY (id)
|
|
80
|
+
);
|
|
81
|
+
SQL
|
|
82
|
+
|
|
83
|
+
# A sample pcrd.config.yml that works with the demo schema.
|
|
84
|
+
# Written to disk by `pcrd demo setup` if no config exists.
|
|
85
|
+
SAMPLE_CONFIG = <<~YAML.freeze
|
|
86
|
+
# pcrd.config.yml — generated by `pcrd demo setup`
|
|
87
|
+
#
|
|
88
|
+
# Source: the original database (running demo schema)
|
|
89
|
+
# Target: the new cluster with the improved schema
|
|
90
|
+
|
|
91
|
+
source:
|
|
92
|
+
host: localhost
|
|
93
|
+
port: 5433
|
|
94
|
+
database: pcrd_source
|
|
95
|
+
user: postgres
|
|
96
|
+
password: postgres
|
|
97
|
+
|
|
98
|
+
target:
|
|
99
|
+
host: localhost
|
|
100
|
+
port: 5434
|
|
101
|
+
database: pcrd_target
|
|
102
|
+
user: postgres
|
|
103
|
+
password: postgres
|
|
104
|
+
|
|
105
|
+
migrate:
|
|
106
|
+
tables:
|
|
107
|
+
- name: users
|
|
108
|
+
columns:
|
|
109
|
+
id:
|
|
110
|
+
type: bigint
|
|
111
|
+
|
|
112
|
+
- name: agents
|
|
113
|
+
columns:
|
|
114
|
+
id:
|
|
115
|
+
type: bigint
|
|
116
|
+
user_id:
|
|
117
|
+
type: bigint
|
|
118
|
+
commission_rate:
|
|
119
|
+
type: numeric(7,4)
|
|
120
|
+
|
|
121
|
+
- name: listings
|
|
122
|
+
optimize_column_order: true
|
|
123
|
+
columns:
|
|
124
|
+
id:
|
|
125
|
+
type: bigint
|
|
126
|
+
agent_id:
|
|
127
|
+
type: bigint
|
|
128
|
+
list_price:
|
|
129
|
+
type: numeric(18,4)
|
|
130
|
+
rename: list_price_precise
|
|
131
|
+
listed_at:
|
|
132
|
+
type: timestamptz
|
|
133
|
+
created_at:
|
|
134
|
+
type: timestamptz
|
|
135
|
+
add_columns:
|
|
136
|
+
- name: updated_at
|
|
137
|
+
type: timestamptz
|
|
138
|
+
default: "now()"
|
|
139
|
+
|
|
140
|
+
analyze:
|
|
141
|
+
tables:
|
|
142
|
+
- listings
|
|
143
|
+
- users
|
|
144
|
+
- agents
|
|
145
|
+
|
|
146
|
+
verify:
|
|
147
|
+
sample_size: 1000
|
|
148
|
+
|
|
149
|
+
cutover:
|
|
150
|
+
sequence_buffer: 1000
|
|
151
|
+
YAML
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
data/lib/pcrd/error.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pcrd
|
|
4
|
+
# Base class for every error pcrd raises on purpose. Catching Pcrd::Error at
|
|
5
|
+
# the CLI boundary turns expected failures into clean messages, while letting
|
|
6
|
+
# genuinely unexpected errors (real bugs) surface with their backtrace.
|
|
7
|
+
class Error < StandardError; end
|
|
8
|
+
|
|
9
|
+
# Raised when a command is given a config that lacks something it needs
|
|
10
|
+
# (e.g. no target connection, no tables).
|
|
11
|
+
class ConfigError < Error; end
|
|
12
|
+
end
|