pgslice 0.4.4 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "analyze TABLE", "Analyze tables"
4
+ option :swapped, type: :boolean, default: false, desc: "Use swapped table"
5
+ def analyze(table)
6
+ table = create_table(table)
7
+ parent_table = options[:swapped] ? table : table.intermediate_table
8
+
9
+ analyze_list = parent_table.partitions + [parent_table]
10
+ run_queries_without_transaction(analyze_list.map { |t| "ANALYZE VERBOSE #{quote_table(t)};" })
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,103 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "fill TABLE", "Fill the partitions in batches"
4
+ option :batch_size, type: :numeric, default: 10000, desc: "Batch size"
5
+ option :swapped, type: :boolean, default: false, desc: "Use swapped table"
6
+ option :source_table, desc: "Source table"
7
+ option :dest_table, desc: "Destination table"
8
+ option :start, type: :numeric, desc: "Primary key to start"
9
+ option :where, desc: "Conditions to filter"
10
+ option :sleep, type: :numeric, desc: "Seconds to sleep between batches"
11
+ def fill(table)
12
+ table = create_table(table)
13
+ source_table = create_table(options[:source_table]) if options[:source_table]
14
+ dest_table = create_table(options[:dest_table]) if options[:dest_table]
15
+
16
+ if options[:swapped]
17
+ source_table ||= table.retired_table
18
+ dest_table ||= table
19
+ else
20
+ source_table ||= table
21
+ dest_table ||= table.intermediate_table
22
+ end
23
+
24
+ assert_table(source_table)
25
+ assert_table(dest_table)
26
+
27
+ period, field, cast, _, declarative, _ = dest_table.fetch_settings(table.trigger_name)
28
+
29
+ if period
30
+ name_format = self.name_format(period)
31
+
32
+ partitions = dest_table.partitions
33
+ if partitions.any?
34
+ starting_time = partition_date(partitions.first, name_format)
35
+ ending_time = advance_date(partition_date(partitions.last, name_format), period, 1)
36
+ end
37
+ end
38
+
39
+ schema_table = period && declarative ? partitions.last : table
40
+
41
+ primary_key = schema_table.primary_key[0]
42
+ abort "No primary key" unless primary_key
43
+
44
+ max_source_id = nil
45
+ begin
46
+ max_source_id = source_table.max_id(primary_key)
47
+ rescue PG::UndefinedFunction
48
+ abort "Only numeric primary keys are supported"
49
+ end
50
+
51
+ max_dest_id =
52
+ if options[:start]
53
+ options[:start]
54
+ elsif options[:swapped]
55
+ dest_table.max_id(primary_key, where: options[:where], below: max_source_id)
56
+ else
57
+ dest_table.max_id(primary_key, where: options[:where])
58
+ end
59
+
60
+ if max_dest_id == 0 && !options[:swapped]
61
+ min_source_id = source_table.min_id(primary_key, field, cast, starting_time, options[:where])
62
+ max_dest_id = min_source_id - 1 if min_source_id
63
+ end
64
+
65
+ starting_id = max_dest_id
66
+ fields = source_table.columns.map { |c| quote_ident(c) }.join(", ")
67
+ batch_size = options[:batch_size]
68
+
69
+ i = 1
70
+ batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
71
+
72
+ if batch_count == 0
73
+ log_sql "/* nothing to fill */"
74
+ end
75
+
76
+ while starting_id < max_source_id
77
+ where = "#{quote_ident(primary_key)} > #{starting_id} AND #{quote_ident(primary_key)} <= #{starting_id + batch_size}"
78
+ if starting_time
79
+ where << " AND #{quote_ident(field)} >= #{sql_date(starting_time, cast)} AND #{quote_ident(field)} < #{sql_date(ending_time, cast)}"
80
+ end
81
+ if options[:where]
82
+ where << " AND #{options[:where]}"
83
+ end
84
+
85
+ query = <<-SQL
86
+ /* #{i} of #{batch_count} */
87
+ INSERT INTO #{quote_table(dest_table)} (#{fields})
88
+ SELECT #{fields} FROM #{quote_table(source_table)}
89
+ WHERE #{where}
90
+ SQL
91
+
92
+ run_query(query)
93
+
94
+ starting_id += batch_size
95
+ i += 1
96
+
97
+ if options[:sleep] && starting_id <= max_source_id
98
+ sleep(options[:sleep])
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,97 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
4
+ option :partition, type: :boolean, default: true, desc: "Partition the table"
5
+ option :trigger_based, type: :boolean, default: false, desc: "Use trigger-based partitioning"
6
+ def prep(table, column=nil, period=nil)
7
+ table = create_table(table)
8
+ intermediate_table = table.intermediate_table
9
+ trigger_name = table.trigger_name
10
+
11
+ unless options[:partition]
12
+ abort "Usage: \"pgslice prep TABLE --no-partition\"" if column || period
13
+ abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
14
+ end
15
+ assert_table(table)
16
+ assert_no_table(intermediate_table)
17
+
18
+ if options[:partition]
19
+ abort "Usage: \"pgslice prep TABLE COLUMN PERIOD\"" if !(column && period)
20
+ abort "Column not found: #{column}" unless table.columns.include?(column)
21
+ abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
22
+ end
23
+
24
+ queries = []
25
+
26
+ # version summary
27
+ # 1. trigger-based
28
+ # 2. declarative, with indexes and foreign keys on child tables
29
+ # 3. declarative, with indexes and foreign keys on parent table
30
+ version =
31
+ if options[:trigger_based] || server_version_num < 100000
32
+ 1
33
+ elsif server_version_num < 110000
34
+ 2
35
+ else
36
+ 3
37
+ end
38
+
39
+ declarative = version > 1
40
+
41
+ if declarative && options[:partition]
42
+ queries << <<-SQL
43
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_ident(column)});
44
+ SQL
45
+
46
+ if version == 3
47
+ index_defs = table.index_defs
48
+ index_defs.each do |index_def|
49
+ queries << make_index_def(index_def, intermediate_table)
50
+ end
51
+
52
+ table.foreign_keys.each do |fk_def|
53
+ queries << make_fk_def(fk_def, intermediate_table)
54
+ end
55
+ end
56
+
57
+ # add comment
58
+ cast = table.column_cast(column)
59
+ queries << <<-SQL
60
+ COMMENT ON TABLE #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast},version:#{version}';
61
+ SQL
62
+ else
63
+ queries << <<-SQL
64
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING ALL);
65
+ SQL
66
+
67
+ table.foreign_keys.each do |fk_def|
68
+ queries << make_fk_def(fk_def, intermediate_table)
69
+ end
70
+ end
71
+
72
+ if options[:partition] && !declarative
73
+ queries << <<-SQL
74
+ CREATE FUNCTION #{quote_ident(trigger_name)}()
75
+ RETURNS trigger AS $$
76
+ BEGIN
77
+ RAISE EXCEPTION 'Create partitions first.';
78
+ END;
79
+ $$ LANGUAGE plpgsql;
80
+ SQL
81
+
82
+ queries << <<-SQL
83
+ CREATE TRIGGER #{quote_ident(trigger_name)}
84
+ BEFORE INSERT ON #{quote_table(intermediate_table)}
85
+ FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
86
+ SQL
87
+
88
+ cast = table.column_cast(column)
89
+ queries << <<-SQL
90
+ COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
91
+ SQL
92
+ end
93
+
94
+ run_queries(queries)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,28 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "swap TABLE", "Swap the intermediate table with the original table"
4
+ option :lock_timeout, default: "5s", desc: "Lock timeout"
5
+ def swap(table)
6
+ table = create_table(table)
7
+ intermediate_table = table.intermediate_table
8
+ retired_table = table.retired_table
9
+
10
+ assert_table(table)
11
+ assert_table(intermediate_table)
12
+ assert_no_table(retired_table)
13
+
14
+ queries = [
15
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(retired_table)};",
16
+ "ALTER TABLE #{quote_table(intermediate_table)} RENAME TO #{quote_no_schema(table)};"
17
+ ]
18
+
19
+ table.sequences.each do |sequence|
20
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
21
+ end
22
+
23
+ queries.unshift("SET LOCAL lock_timeout = '#{options[:lock_timeout]}';") if server_version_num >= 90300
24
+
25
+ run_queries(queries)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,18 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "unprep TABLE", "Undo prep"
4
+ def unprep(table)
5
+ table = create_table(table)
6
+ intermediate_table = table.intermediate_table
7
+ trigger_name = table.trigger_name
8
+
9
+ assert_table(intermediate_table)
10
+
11
+ queries = [
12
+ "DROP TABLE #{quote_table(intermediate_table)} CASCADE;",
13
+ "DROP FUNCTION IF EXISTS #{quote_ident(trigger_name)}();"
14
+ ]
15
+ run_queries(queries)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,25 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "unswap TABLE", "Undo swap"
4
+ def unswap(table)
5
+ table = create_table(table)
6
+ intermediate_table = table.intermediate_table
7
+ retired_table = table.retired_table
8
+
9
+ assert_table(table)
10
+ assert_table(retired_table)
11
+ assert_no_table(intermediate_table)
12
+
13
+ queries = [
14
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(intermediate_table)};",
15
+ "ALTER TABLE #{quote_table(retired_table)} RENAME TO #{quote_no_schema(table)};"
16
+ ]
17
+
18
+ table.sequences.each do |sequence|
19
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
20
+ end
21
+
22
+ run_queries(queries)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,176 @@
1
+ module PgSlice
2
+ module Helpers
3
+ SQL_FORMAT = {
4
+ day: "YYYYMMDD",
5
+ month: "YYYYMM",
6
+ year: "YYYY"
7
+ }
8
+
9
+ protected
10
+
11
+ # output
12
+
13
+ def log(message = nil)
14
+ error message
15
+ end
16
+
17
+ def log_sql(message = nil)
18
+ say message
19
+ end
20
+
21
+ def abort(message)
22
+ raise Thor::Error, message
23
+ end
24
+
25
+ # database connection
26
+
27
+ def connection
28
+ @connection ||= begin
29
+ url = options[:url] || ENV["PGSLICE_URL"]
30
+ abort "Set PGSLICE_URL or use the --url option" unless url
31
+
32
+ uri = URI.parse(url)
33
+ params = CGI.parse(uri.query.to_s)
34
+ # remove schema
35
+ @schema = Array(params.delete("schema") || "public")[0]
36
+ uri.query = URI.encode_www_form(params)
37
+
38
+ ENV["PGCONNECT_TIMEOUT"] ||= "1"
39
+ PG::Connection.new(uri.to_s)
40
+ end
41
+ rescue PG::ConnectionBad => e
42
+ abort e.message
43
+ rescue URI::InvalidURIError
44
+ abort "Invalid url"
45
+ end
46
+
47
+ def schema
48
+ connection # ensure called first
49
+ @schema
50
+ end
51
+
52
+ def execute(query, params = [])
53
+ connection.exec_params(query, params).to_a
54
+ end
55
+
56
+ def run_queries(queries)
57
+ connection.transaction do
58
+ execute("SET LOCAL client_min_messages TO warning") unless options[:dry_run]
59
+ log_sql "BEGIN;"
60
+ log_sql
61
+ run_queries_without_transaction(queries)
62
+ log_sql "COMMIT;"
63
+ end
64
+ end
65
+
66
+ def run_query(query)
67
+ log_sql query
68
+ unless options[:dry_run]
69
+ begin
70
+ execute(query)
71
+ rescue PG::ServerError => e
72
+ abort("#{e.class.name}: #{e.message}")
73
+ end
74
+ end
75
+ log_sql
76
+ end
77
+
78
+ def run_queries_without_transaction(queries)
79
+ queries.each do |query|
80
+ run_query(query)
81
+ end
82
+ end
83
+
84
+ def server_version_num
85
+ execute("SHOW server_version_num")[0]["server_version_num"].to_i
86
+ end
87
+
88
+ # helpers
89
+
90
+ def sql_date(time, cast, add_cast = true)
91
+ if cast == "timestamptz"
92
+ fmt = "%Y-%m-%d %H:%M:%S UTC"
93
+ else
94
+ fmt = "%Y-%m-%d"
95
+ end
96
+ str = "'#{time.strftime(fmt)}'"
97
+ add_cast ? "#{str}::#{cast}" : str
98
+ end
99
+
100
+ def name_format(period)
101
+ case period.to_sym
102
+ when :day
103
+ "%Y%m%d"
104
+ when :month
105
+ "%Y%m"
106
+ else
107
+ "%Y"
108
+ end
109
+ end
110
+
111
+ def partition_date(partition, name_format)
112
+ DateTime.strptime(partition.name.split("_").last, name_format)
113
+ end
114
+
115
+ def round_date(date, period)
116
+ date = date.to_date
117
+ case period.to_sym
118
+ when :day
119
+ date
120
+ when :month
121
+ Date.new(date.year, date.month)
122
+ else
123
+ Date.new(date.year)
124
+ end
125
+ end
126
+
127
+ def assert_table(table)
128
+ abort "Table not found: #{table}" unless table.exists?
129
+ end
130
+
131
+ def assert_no_table(table)
132
+ abort "Table already exists: #{table}" if table.exists?
133
+ end
134
+
135
+ def advance_date(date, period, count = 1)
136
+ date = date.to_date
137
+ case period.to_sym
138
+ when :day
139
+ date.next_day(count)
140
+ when :month
141
+ date.next_month(count)
142
+ else
143
+ date.next_year(count)
144
+ end
145
+ end
146
+
147
+ def quote_ident(value)
148
+ PG::Connection.quote_ident(value)
149
+ end
150
+
151
+ def quote_table(table)
152
+ table.quote_table
153
+ end
154
+
155
+ def quote_no_schema(table)
156
+ quote_ident(table.name)
157
+ end
158
+
159
+ def create_table(name)
160
+ if name.include?(".")
161
+ schema, name = name.split(".", 2)
162
+ else
163
+ schema = self.schema
164
+ end
165
+ Table.new(schema, name)
166
+ end
167
+
168
+ def make_index_def(index_def, table)
169
+ index_def.sub(/ ON \S+ USING /, " ON #{quote_table(table)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
170
+ end
171
+
172
+ def make_fk_def(fk_def, table)
173
+ "ALTER TABLE #{quote_table(table)} ADD #{fk_def};"
174
+ end
175
+ end
176
+ end