pgslice 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +41 -126
- data/exe/pgslice +1 -1
- data/lib/pgslice.rb +13 -3
- data/lib/pgslice/cli.rb +32 -0
- data/lib/pgslice/cli/add_partitions.rb +130 -0
- data/lib/pgslice/cli/analyze.rb +13 -0
- data/lib/pgslice/cli/fill.rb +103 -0
- data/lib/pgslice/cli/prep.rb +97 -0
- data/lib/pgslice/cli/swap.rb +28 -0
- data/lib/pgslice/cli/unprep.rb +18 -0
- data/lib/pgslice/cli/unswap.rb +25 -0
- data/lib/pgslice/helpers.rb +176 -0
- data/lib/pgslice/table.rb +139 -23
- data/lib/pgslice/version.rb +1 -1
- metadata +11 -4
- data/lib/pgslice/client.rb +0 -584
- data/lib/pgslice/generic_table.rb +0 -89
@@ -0,0 +1,13 @@
|
|
1
|
+
module PgSlice
|
2
|
+
class CLI
|
3
|
+
desc "analyze TABLE", "Analyze tables"
|
4
|
+
option :swapped, type: :boolean, default: false, desc: "Use swapped table"
|
5
|
+
def analyze(table)
|
6
|
+
table = create_table(table)
|
7
|
+
parent_table = options[:swapped] ? table : table.intermediate_table
|
8
|
+
|
9
|
+
analyze_list = parent_table.partitions + [parent_table]
|
10
|
+
run_queries_without_transaction(analyze_list.map { |t| "ANALYZE VERBOSE #{quote_table(t)};" })
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module PgSlice
|
2
|
+
class CLI
|
3
|
+
desc "fill TABLE", "Fill the partitions in batches"
|
4
|
+
option :batch_size, type: :numeric, default: 10000, desc: "Batch size"
|
5
|
+
option :swapped, type: :boolean, default: false, desc: "Use swapped table"
|
6
|
+
option :source_table, desc: "Source table"
|
7
|
+
option :dest_table, desc: "Destination table"
|
8
|
+
option :start, type: :numeric, desc: "Primary key to start"
|
9
|
+
option :where, desc: "Conditions to filter"
|
10
|
+
option :sleep, type: :numeric, desc: "Seconds to sleep between batches"
|
11
|
+
def fill(table)
|
12
|
+
table = create_table(table)
|
13
|
+
source_table = create_table(options[:source_table]) if options[:source_table]
|
14
|
+
dest_table = create_table(options[:dest_table]) if options[:dest_table]
|
15
|
+
|
16
|
+
if options[:swapped]
|
17
|
+
source_table ||= table.retired_table
|
18
|
+
dest_table ||= table
|
19
|
+
else
|
20
|
+
source_table ||= table
|
21
|
+
dest_table ||= table.intermediate_table
|
22
|
+
end
|
23
|
+
|
24
|
+
assert_table(source_table)
|
25
|
+
assert_table(dest_table)
|
26
|
+
|
27
|
+
period, field, cast, _, declarative, _ = dest_table.fetch_settings(table.trigger_name)
|
28
|
+
|
29
|
+
if period
|
30
|
+
name_format = self.name_format(period)
|
31
|
+
|
32
|
+
partitions = dest_table.partitions
|
33
|
+
if partitions.any?
|
34
|
+
starting_time = partition_date(partitions.first, name_format)
|
35
|
+
ending_time = advance_date(partition_date(partitions.last, name_format), period, 1)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
schema_table = period && declarative ? partitions.last : table
|
40
|
+
|
41
|
+
primary_key = schema_table.primary_key[0]
|
42
|
+
abort "No primary key" unless primary_key
|
43
|
+
|
44
|
+
max_source_id = nil
|
45
|
+
begin
|
46
|
+
max_source_id = source_table.max_id(primary_key)
|
47
|
+
rescue PG::UndefinedFunction
|
48
|
+
abort "Only numeric primary keys are supported"
|
49
|
+
end
|
50
|
+
|
51
|
+
max_dest_id =
|
52
|
+
if options[:start]
|
53
|
+
options[:start]
|
54
|
+
elsif options[:swapped]
|
55
|
+
dest_table.max_id(primary_key, where: options[:where], below: max_source_id)
|
56
|
+
else
|
57
|
+
dest_table.max_id(primary_key, where: options[:where])
|
58
|
+
end
|
59
|
+
|
60
|
+
if max_dest_id == 0 && !options[:swapped]
|
61
|
+
min_source_id = source_table.min_id(primary_key, field, cast, starting_time, options[:where])
|
62
|
+
max_dest_id = min_source_id - 1 if min_source_id
|
63
|
+
end
|
64
|
+
|
65
|
+
starting_id = max_dest_id
|
66
|
+
fields = source_table.columns.map { |c| quote_ident(c) }.join(", ")
|
67
|
+
batch_size = options[:batch_size]
|
68
|
+
|
69
|
+
i = 1
|
70
|
+
batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
|
71
|
+
|
72
|
+
if batch_count == 0
|
73
|
+
log_sql "/* nothing to fill */"
|
74
|
+
end
|
75
|
+
|
76
|
+
while starting_id < max_source_id
|
77
|
+
where = "#{quote_ident(primary_key)} > #{starting_id} AND #{quote_ident(primary_key)} <= #{starting_id + batch_size}"
|
78
|
+
if starting_time
|
79
|
+
where << " AND #{quote_ident(field)} >= #{sql_date(starting_time, cast)} AND #{quote_ident(field)} < #{sql_date(ending_time, cast)}"
|
80
|
+
end
|
81
|
+
if options[:where]
|
82
|
+
where << " AND #{options[:where]}"
|
83
|
+
end
|
84
|
+
|
85
|
+
query = <<-SQL
|
86
|
+
/* #{i} of #{batch_count} */
|
87
|
+
INSERT INTO #{quote_table(dest_table)} (#{fields})
|
88
|
+
SELECT #{fields} FROM #{quote_table(source_table)}
|
89
|
+
WHERE #{where}
|
90
|
+
SQL
|
91
|
+
|
92
|
+
run_query(query)
|
93
|
+
|
94
|
+
starting_id += batch_size
|
95
|
+
i += 1
|
96
|
+
|
97
|
+
if options[:sleep] && starting_id <= max_source_id
|
98
|
+
sleep(options[:sleep])
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module PgSlice
|
2
|
+
class CLI
|
3
|
+
desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
|
4
|
+
option :partition, type: :boolean, default: true, desc: "Partition the table"
|
5
|
+
option :trigger_based, type: :boolean, default: false, desc: "Use trigger-based partitioning"
|
6
|
+
def prep(table, column=nil, period=nil)
|
7
|
+
table = create_table(table)
|
8
|
+
intermediate_table = table.intermediate_table
|
9
|
+
trigger_name = table.trigger_name
|
10
|
+
|
11
|
+
unless options[:partition]
|
12
|
+
abort "Usage: \"pgslice prep TABLE --no-partition\"" if column || period
|
13
|
+
abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
|
14
|
+
end
|
15
|
+
assert_table(table)
|
16
|
+
assert_no_table(intermediate_table)
|
17
|
+
|
18
|
+
if options[:partition]
|
19
|
+
abort "Usage: \"pgslice prep TABLE COLUMN PERIOD\"" if !(column && period)
|
20
|
+
abort "Column not found: #{column}" unless table.columns.include?(column)
|
21
|
+
abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
|
22
|
+
end
|
23
|
+
|
24
|
+
queries = []
|
25
|
+
|
26
|
+
# version summary
|
27
|
+
# 1. trigger-based
|
28
|
+
# 2. declarative, with indexes and foreign keys on child tables
|
29
|
+
# 3. declarative, with indexes and foreign keys on parent table
|
30
|
+
version =
|
31
|
+
if options[:trigger_based] || server_version_num < 100000
|
32
|
+
1
|
33
|
+
elsif server_version_num < 110000
|
34
|
+
2
|
35
|
+
else
|
36
|
+
3
|
37
|
+
end
|
38
|
+
|
39
|
+
declarative = version > 1
|
40
|
+
|
41
|
+
if declarative && options[:partition]
|
42
|
+
queries << <<-SQL
|
43
|
+
CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_ident(column)});
|
44
|
+
SQL
|
45
|
+
|
46
|
+
if version == 3
|
47
|
+
index_defs = table.index_defs
|
48
|
+
index_defs.each do |index_def|
|
49
|
+
queries << make_index_def(index_def, intermediate_table)
|
50
|
+
end
|
51
|
+
|
52
|
+
table.foreign_keys.each do |fk_def|
|
53
|
+
queries << make_fk_def(fk_def, intermediate_table)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# add comment
|
58
|
+
cast = table.column_cast(column)
|
59
|
+
queries << <<-SQL
|
60
|
+
COMMENT ON TABLE #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast},version:#{version}';
|
61
|
+
SQL
|
62
|
+
else
|
63
|
+
queries << <<-SQL
|
64
|
+
CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING ALL);
|
65
|
+
SQL
|
66
|
+
|
67
|
+
table.foreign_keys.each do |fk_def|
|
68
|
+
queries << make_fk_def(fk_def, intermediate_table)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
if options[:partition] && !declarative
|
73
|
+
queries << <<-SQL
|
74
|
+
CREATE FUNCTION #{quote_ident(trigger_name)}()
|
75
|
+
RETURNS trigger AS $$
|
76
|
+
BEGIN
|
77
|
+
RAISE EXCEPTION 'Create partitions first.';
|
78
|
+
END;
|
79
|
+
$$ LANGUAGE plpgsql;
|
80
|
+
SQL
|
81
|
+
|
82
|
+
queries << <<-SQL
|
83
|
+
CREATE TRIGGER #{quote_ident(trigger_name)}
|
84
|
+
BEFORE INSERT ON #{quote_table(intermediate_table)}
|
85
|
+
FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
|
86
|
+
SQL
|
87
|
+
|
88
|
+
cast = table.column_cast(column)
|
89
|
+
queries << <<-SQL
|
90
|
+
COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
|
91
|
+
SQL
|
92
|
+
end
|
93
|
+
|
94
|
+
run_queries(queries)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module PgSlice
|
2
|
+
class CLI
|
3
|
+
desc "swap TABLE", "Swap the intermediate table with the original table"
|
4
|
+
option :lock_timeout, default: "5s", desc: "Lock timeout"
|
5
|
+
def swap(table)
|
6
|
+
table = create_table(table)
|
7
|
+
intermediate_table = table.intermediate_table
|
8
|
+
retired_table = table.retired_table
|
9
|
+
|
10
|
+
assert_table(table)
|
11
|
+
assert_table(intermediate_table)
|
12
|
+
assert_no_table(retired_table)
|
13
|
+
|
14
|
+
queries = [
|
15
|
+
"ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(retired_table)};",
|
16
|
+
"ALTER TABLE #{quote_table(intermediate_table)} RENAME TO #{quote_no_schema(table)};"
|
17
|
+
]
|
18
|
+
|
19
|
+
table.sequences.each do |sequence|
|
20
|
+
queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
|
21
|
+
end
|
22
|
+
|
23
|
+
queries.unshift("SET LOCAL lock_timeout = '#{options[:lock_timeout]}';") if server_version_num >= 90300
|
24
|
+
|
25
|
+
run_queries(queries)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module PgSlice
|
2
|
+
class CLI
|
3
|
+
desc "unprep TABLE", "Undo prep"
|
4
|
+
def unprep(table)
|
5
|
+
table = create_table(table)
|
6
|
+
intermediate_table = table.intermediate_table
|
7
|
+
trigger_name = table.trigger_name
|
8
|
+
|
9
|
+
assert_table(intermediate_table)
|
10
|
+
|
11
|
+
queries = [
|
12
|
+
"DROP TABLE #{quote_table(intermediate_table)} CASCADE;",
|
13
|
+
"DROP FUNCTION IF EXISTS #{quote_ident(trigger_name)}();"
|
14
|
+
]
|
15
|
+
run_queries(queries)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module PgSlice
|
2
|
+
class CLI
|
3
|
+
desc "unswap TABLE", "Undo swap"
|
4
|
+
def unswap(table)
|
5
|
+
table = create_table(table)
|
6
|
+
intermediate_table = table.intermediate_table
|
7
|
+
retired_table = table.retired_table
|
8
|
+
|
9
|
+
assert_table(table)
|
10
|
+
assert_table(retired_table)
|
11
|
+
assert_no_table(intermediate_table)
|
12
|
+
|
13
|
+
queries = [
|
14
|
+
"ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(intermediate_table)};",
|
15
|
+
"ALTER TABLE #{quote_table(retired_table)} RENAME TO #{quote_no_schema(table)};"
|
16
|
+
]
|
17
|
+
|
18
|
+
table.sequences.each do |sequence|
|
19
|
+
queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
|
20
|
+
end
|
21
|
+
|
22
|
+
run_queries(queries)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
module PgSlice
|
2
|
+
module Helpers
|
3
|
+
SQL_FORMAT = {
|
4
|
+
day: "YYYYMMDD",
|
5
|
+
month: "YYYYMM",
|
6
|
+
year: "YYYY"
|
7
|
+
}
|
8
|
+
|
9
|
+
protected
|
10
|
+
|
11
|
+
# output
|
12
|
+
|
13
|
+
def log(message = nil)
|
14
|
+
error message
|
15
|
+
end
|
16
|
+
|
17
|
+
def log_sql(message = nil)
|
18
|
+
say message
|
19
|
+
end
|
20
|
+
|
21
|
+
def abort(message)
|
22
|
+
raise Thor::Error, message
|
23
|
+
end
|
24
|
+
|
25
|
+
# database connection
|
26
|
+
|
27
|
+
def connection
|
28
|
+
@connection ||= begin
|
29
|
+
url = options[:url] || ENV["PGSLICE_URL"]
|
30
|
+
abort "Set PGSLICE_URL or use the --url option" unless url
|
31
|
+
|
32
|
+
uri = URI.parse(url)
|
33
|
+
params = CGI.parse(uri.query.to_s)
|
34
|
+
# remove schema
|
35
|
+
@schema = Array(params.delete("schema") || "public")[0]
|
36
|
+
uri.query = URI.encode_www_form(params)
|
37
|
+
|
38
|
+
ENV["PGCONNECT_TIMEOUT"] ||= "1"
|
39
|
+
PG::Connection.new(uri.to_s)
|
40
|
+
end
|
41
|
+
rescue PG::ConnectionBad => e
|
42
|
+
abort e.message
|
43
|
+
rescue URI::InvalidURIError
|
44
|
+
abort "Invalid url"
|
45
|
+
end
|
46
|
+
|
47
|
+
def schema
|
48
|
+
connection # ensure called first
|
49
|
+
@schema
|
50
|
+
end
|
51
|
+
|
52
|
+
def execute(query, params = [])
|
53
|
+
connection.exec_params(query, params).to_a
|
54
|
+
end
|
55
|
+
|
56
|
+
def run_queries(queries)
|
57
|
+
connection.transaction do
|
58
|
+
execute("SET LOCAL client_min_messages TO warning") unless options[:dry_run]
|
59
|
+
log_sql "BEGIN;"
|
60
|
+
log_sql
|
61
|
+
run_queries_without_transaction(queries)
|
62
|
+
log_sql "COMMIT;"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def run_query(query)
|
67
|
+
log_sql query
|
68
|
+
unless options[:dry_run]
|
69
|
+
begin
|
70
|
+
execute(query)
|
71
|
+
rescue PG::ServerError => e
|
72
|
+
abort("#{e.class.name}: #{e.message}")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
log_sql
|
76
|
+
end
|
77
|
+
|
78
|
+
def run_queries_without_transaction(queries)
|
79
|
+
queries.each do |query|
|
80
|
+
run_query(query)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def server_version_num
|
85
|
+
execute("SHOW server_version_num")[0]["server_version_num"].to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
# helpers
|
89
|
+
|
90
|
+
def sql_date(time, cast, add_cast = true)
|
91
|
+
if cast == "timestamptz"
|
92
|
+
fmt = "%Y-%m-%d %H:%M:%S UTC"
|
93
|
+
else
|
94
|
+
fmt = "%Y-%m-%d"
|
95
|
+
end
|
96
|
+
str = "'#{time.strftime(fmt)}'"
|
97
|
+
add_cast ? "#{str}::#{cast}" : str
|
98
|
+
end
|
99
|
+
|
100
|
+
def name_format(period)
|
101
|
+
case period.to_sym
|
102
|
+
when :day
|
103
|
+
"%Y%m%d"
|
104
|
+
when :month
|
105
|
+
"%Y%m"
|
106
|
+
else
|
107
|
+
"%Y"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def partition_date(partition, name_format)
|
112
|
+
DateTime.strptime(partition.name.split("_").last, name_format)
|
113
|
+
end
|
114
|
+
|
115
|
+
def round_date(date, period)
|
116
|
+
date = date.to_date
|
117
|
+
case period.to_sym
|
118
|
+
when :day
|
119
|
+
date
|
120
|
+
when :month
|
121
|
+
Date.new(date.year, date.month)
|
122
|
+
else
|
123
|
+
Date.new(date.year)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def assert_table(table)
|
128
|
+
abort "Table not found: #{table}" unless table.exists?
|
129
|
+
end
|
130
|
+
|
131
|
+
def assert_no_table(table)
|
132
|
+
abort "Table already exists: #{table}" if table.exists?
|
133
|
+
end
|
134
|
+
|
135
|
+
def advance_date(date, period, count = 1)
|
136
|
+
date = date.to_date
|
137
|
+
case period.to_sym
|
138
|
+
when :day
|
139
|
+
date.next_day(count)
|
140
|
+
when :month
|
141
|
+
date.next_month(count)
|
142
|
+
else
|
143
|
+
date.next_year(count)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def quote_ident(value)
|
148
|
+
PG::Connection.quote_ident(value)
|
149
|
+
end
|
150
|
+
|
151
|
+
def quote_table(table)
|
152
|
+
table.quote_table
|
153
|
+
end
|
154
|
+
|
155
|
+
def quote_no_schema(table)
|
156
|
+
quote_ident(table.name)
|
157
|
+
end
|
158
|
+
|
159
|
+
def create_table(name)
|
160
|
+
if name.include?(".")
|
161
|
+
schema, name = name.split(".", 2)
|
162
|
+
else
|
163
|
+
schema = self.schema
|
164
|
+
end
|
165
|
+
Table.new(schema, name)
|
166
|
+
end
|
167
|
+
|
168
|
+
def make_index_def(index_def, table)
|
169
|
+
index_def.sub(/ ON \S+ USING /, " ON #{quote_table(table)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
|
170
|
+
end
|
171
|
+
|
172
|
+
def make_fk_def(fk_def, table)
|
173
|
+
"ALTER TABLE #{quote_table(table)} ADD #{fk_def};"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|