pgslice 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "analyze TABLE", "Analyze tables"
4
+ option :swapped, type: :boolean, default: false, desc: "Use swapped table"
5
+ def analyze(table)
6
+ table = create_table(table)
7
+ parent_table = options[:swapped] ? table : table.intermediate_table
8
+
9
+ analyze_list = parent_table.partitions + [parent_table]
10
+ run_queries_without_transaction(analyze_list.map { |t| "ANALYZE VERBOSE #{quote_table(t)};" })
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,103 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "fill TABLE", "Fill the partitions in batches"
4
+ option :batch_size, type: :numeric, default: 10000, desc: "Batch size"
5
+ option :swapped, type: :boolean, default: false, desc: "Use swapped table"
6
+ option :source_table, desc: "Source table"
7
+ option :dest_table, desc: "Destination table"
8
+ option :start, type: :numeric, desc: "Primary key to start"
9
+ option :where, desc: "Conditions to filter"
10
+ option :sleep, type: :numeric, desc: "Seconds to sleep between batches"
11
+ def fill(table)
12
+ table = create_table(table)
13
+ source_table = create_table(options[:source_table]) if options[:source_table]
14
+ dest_table = create_table(options[:dest_table]) if options[:dest_table]
15
+
16
+ if options[:swapped]
17
+ source_table ||= table.retired_table
18
+ dest_table ||= table
19
+ else
20
+ source_table ||= table
21
+ dest_table ||= table.intermediate_table
22
+ end
23
+
24
+ assert_table(source_table)
25
+ assert_table(dest_table)
26
+
27
+ period, field, cast, _, declarative, _ = dest_table.fetch_settings(table.trigger_name)
28
+
29
+ if period
30
+ name_format = self.name_format(period)
31
+
32
+ partitions = dest_table.partitions
33
+ if partitions.any?
34
+ starting_time = partition_date(partitions.first, name_format)
35
+ ending_time = advance_date(partition_date(partitions.last, name_format), period, 1)
36
+ end
37
+ end
38
+
39
+ schema_table = period && declarative ? partitions.last : table
40
+
41
+ primary_key = schema_table.primary_key[0]
42
+ abort "No primary key" unless primary_key
43
+
44
+ max_source_id = nil
45
+ begin
46
+ max_source_id = source_table.max_id(primary_key)
47
+ rescue PG::UndefinedFunction
48
+ abort "Only numeric primary keys are supported"
49
+ end
50
+
51
+ max_dest_id =
52
+ if options[:start]
53
+ options[:start]
54
+ elsif options[:swapped]
55
+ dest_table.max_id(primary_key, where: options[:where], below: max_source_id)
56
+ else
57
+ dest_table.max_id(primary_key, where: options[:where])
58
+ end
59
+
60
+ if max_dest_id == 0 && !options[:swapped]
61
+ min_source_id = source_table.min_id(primary_key, field, cast, starting_time, options[:where])
62
+ max_dest_id = min_source_id - 1 if min_source_id
63
+ end
64
+
65
+ starting_id = max_dest_id
66
+ fields = source_table.columns.map { |c| quote_ident(c) }.join(", ")
67
+ batch_size = options[:batch_size]
68
+
69
+ i = 1
70
+ batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
71
+
72
+ if batch_count == 0
73
+ log_sql "/* nothing to fill */"
74
+ end
75
+
76
+ while starting_id < max_source_id
77
+ where = "#{quote_ident(primary_key)} > #{starting_id} AND #{quote_ident(primary_key)} <= #{starting_id + batch_size}"
78
+ if starting_time
79
+ where << " AND #{quote_ident(field)} >= #{sql_date(starting_time, cast)} AND #{quote_ident(field)} < #{sql_date(ending_time, cast)}"
80
+ end
81
+ if options[:where]
82
+ where << " AND #{options[:where]}"
83
+ end
84
+
85
+ query = <<-SQL
86
+ /* #{i} of #{batch_count} */
87
+ INSERT INTO #{quote_table(dest_table)} (#{fields})
88
+ SELECT #{fields} FROM #{quote_table(source_table)}
89
+ WHERE #{where}
90
+ SQL
91
+
92
+ run_query(query)
93
+
94
+ starting_id += batch_size
95
+ i += 1
96
+
97
+ if options[:sleep] && starting_id <= max_source_id
98
+ sleep(options[:sleep])
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,97 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
4
+ option :partition, type: :boolean, default: true, desc: "Partition the table"
5
+ option :trigger_based, type: :boolean, default: false, desc: "Use trigger-based partitioning"
6
+ def prep(table, column=nil, period=nil)
7
+ table = create_table(table)
8
+ intermediate_table = table.intermediate_table
9
+ trigger_name = table.trigger_name
10
+
11
+ unless options[:partition]
12
+ abort "Usage: \"pgslice prep TABLE --no-partition\"" if column || period
13
+ abort "Can't use --trigger-based and --no-partition" if options[:trigger_based]
14
+ end
15
+ assert_table(table)
16
+ assert_no_table(intermediate_table)
17
+
18
+ if options[:partition]
19
+ abort "Usage: \"pgslice prep TABLE COLUMN PERIOD\"" if !(column && period)
20
+ abort "Column not found: #{column}" unless table.columns.include?(column)
21
+ abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
22
+ end
23
+
24
+ queries = []
25
+
26
+ # version summary
27
+ # 1. trigger-based
28
+ # 2. declarative, with indexes and foreign keys on child tables
29
+ # 3. declarative, with indexes and foreign keys on parent table
30
+ version =
31
+ if options[:trigger_based] || server_version_num < 100000
32
+ 1
33
+ elsif server_version_num < 110000
34
+ 2
35
+ else
36
+ 3
37
+ end
38
+
39
+ declarative = version > 1
40
+
41
+ if declarative && options[:partition]
42
+ queries << <<-SQL
43
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_ident(column)});
44
+ SQL
45
+
46
+ if version == 3
47
+ index_defs = table.index_defs
48
+ index_defs.each do |index_def|
49
+ queries << make_index_def(index_def, intermediate_table)
50
+ end
51
+
52
+ table.foreign_keys.each do |fk_def|
53
+ queries << make_fk_def(fk_def, intermediate_table)
54
+ end
55
+ end
56
+
57
+ # add comment
58
+ cast = table.column_cast(column)
59
+ queries << <<-SQL
60
+ COMMENT ON TABLE #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast},version:#{version}';
61
+ SQL
62
+ else
63
+ queries << <<-SQL
64
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING ALL);
65
+ SQL
66
+
67
+ table.foreign_keys.each do |fk_def|
68
+ queries << make_fk_def(fk_def, intermediate_table)
69
+ end
70
+ end
71
+
72
+ if options[:partition] && !declarative
73
+ queries << <<-SQL
74
+ CREATE FUNCTION #{quote_ident(trigger_name)}()
75
+ RETURNS trigger AS $$
76
+ BEGIN
77
+ RAISE EXCEPTION 'Create partitions first.';
78
+ END;
79
+ $$ LANGUAGE plpgsql;
80
+ SQL
81
+
82
+ queries << <<-SQL
83
+ CREATE TRIGGER #{quote_ident(trigger_name)}
84
+ BEFORE INSERT ON #{quote_table(intermediate_table)}
85
+ FOR EACH ROW EXECUTE PROCEDURE #{quote_ident(trigger_name)}();
86
+ SQL
87
+
88
+ cast = table.column_cast(column)
89
+ queries << <<-SQL
90
+ COMMENT ON TRIGGER #{quote_ident(trigger_name)} ON #{quote_table(intermediate_table)} is 'column:#{column},period:#{period},cast:#{cast}';
91
+ SQL
92
+ end
93
+
94
+ run_queries(queries)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,28 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "swap TABLE", "Swap the intermediate table with the original table"
4
+ option :lock_timeout, default: "5s", desc: "Lock timeout"
5
+ def swap(table)
6
+ table = create_table(table)
7
+ intermediate_table = table.intermediate_table
8
+ retired_table = table.retired_table
9
+
10
+ assert_table(table)
11
+ assert_table(intermediate_table)
12
+ assert_no_table(retired_table)
13
+
14
+ queries = [
15
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(retired_table)};",
16
+ "ALTER TABLE #{quote_table(intermediate_table)} RENAME TO #{quote_no_schema(table)};"
17
+ ]
18
+
19
+ table.sequences.each do |sequence|
20
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
21
+ end
22
+
23
+ queries.unshift("SET LOCAL lock_timeout = '#{options[:lock_timeout]}';") if server_version_num >= 90300
24
+
25
+ run_queries(queries)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,18 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "unprep TABLE", "Undo prep"
4
+ def unprep(table)
5
+ table = create_table(table)
6
+ intermediate_table = table.intermediate_table
7
+ trigger_name = table.trigger_name
8
+
9
+ assert_table(intermediate_table)
10
+
11
+ queries = [
12
+ "DROP TABLE #{quote_table(intermediate_table)} CASCADE;",
13
+ "DROP FUNCTION IF EXISTS #{quote_ident(trigger_name)}();"
14
+ ]
15
+ run_queries(queries)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,25 @@
1
+ module PgSlice
2
+ class CLI
3
+ desc "unswap TABLE", "Undo swap"
4
+ def unswap(table)
5
+ table = create_table(table)
6
+ intermediate_table = table.intermediate_table
7
+ retired_table = table.retired_table
8
+
9
+ assert_table(table)
10
+ assert_table(retired_table)
11
+ assert_no_table(intermediate_table)
12
+
13
+ queries = [
14
+ "ALTER TABLE #{quote_table(table)} RENAME TO #{quote_no_schema(intermediate_table)};",
15
+ "ALTER TABLE #{quote_table(retired_table)} RENAME TO #{quote_no_schema(table)};"
16
+ ]
17
+
18
+ table.sequences.each do |sequence|
19
+ queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
20
+ end
21
+
22
+ run_queries(queries)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,176 @@
1
+ module PgSlice
2
+ module Helpers
3
+ SQL_FORMAT = {
4
+ day: "YYYYMMDD",
5
+ month: "YYYYMM",
6
+ year: "YYYY"
7
+ }
8
+
9
+ protected
10
+
11
+ # output
12
+
13
+ def log(message = nil)
14
+ error message
15
+ end
16
+
17
+ def log_sql(message = nil)
18
+ say message
19
+ end
20
+
21
+ def abort(message)
22
+ raise Thor::Error, message
23
+ end
24
+
25
+ # database connection
26
+
27
+ def connection
28
+ @connection ||= begin
29
+ url = options[:url] || ENV["PGSLICE_URL"]
30
+ abort "Set PGSLICE_URL or use the --url option" unless url
31
+
32
+ uri = URI.parse(url)
33
+ params = CGI.parse(uri.query.to_s)
34
+ # remove schema
35
+ @schema = Array(params.delete("schema") || "public")[0]
36
+ uri.query = URI.encode_www_form(params)
37
+
38
+ ENV["PGCONNECT_TIMEOUT"] ||= "1"
39
+ PG::Connection.new(uri.to_s)
40
+ end
41
+ rescue PG::ConnectionBad => e
42
+ abort e.message
43
+ rescue URI::InvalidURIError
44
+ abort "Invalid url"
45
+ end
46
+
47
+ def schema
48
+ connection # ensure called first
49
+ @schema
50
+ end
51
+
52
+ def execute(query, params = [])
53
+ connection.exec_params(query, params).to_a
54
+ end
55
+
56
+ def run_queries(queries)
57
+ connection.transaction do
58
+ execute("SET LOCAL client_min_messages TO warning") unless options[:dry_run]
59
+ log_sql "BEGIN;"
60
+ log_sql
61
+ run_queries_without_transaction(queries)
62
+ log_sql "COMMIT;"
63
+ end
64
+ end
65
+
66
+ def run_query(query)
67
+ log_sql query
68
+ unless options[:dry_run]
69
+ begin
70
+ execute(query)
71
+ rescue PG::ServerError => e
72
+ abort("#{e.class.name}: #{e.message}")
73
+ end
74
+ end
75
+ log_sql
76
+ end
77
+
78
+ def run_queries_without_transaction(queries)
79
+ queries.each do |query|
80
+ run_query(query)
81
+ end
82
+ end
83
+
84
+ def server_version_num
85
+ execute("SHOW server_version_num")[0]["server_version_num"].to_i
86
+ end
87
+
88
+ # helpers
89
+
90
+ def sql_date(time, cast, add_cast = true)
91
+ if cast == "timestamptz"
92
+ fmt = "%Y-%m-%d %H:%M:%S UTC"
93
+ else
94
+ fmt = "%Y-%m-%d"
95
+ end
96
+ str = "'#{time.strftime(fmt)}'"
97
+ add_cast ? "#{str}::#{cast}" : str
98
+ end
99
+
100
+ def name_format(period)
101
+ case period.to_sym
102
+ when :day
103
+ "%Y%m%d"
104
+ when :month
105
+ "%Y%m"
106
+ else
107
+ "%Y"
108
+ end
109
+ end
110
+
111
+ def partition_date(partition, name_format)
112
+ DateTime.strptime(partition.name.split("_").last, name_format)
113
+ end
114
+
115
+ def round_date(date, period)
116
+ date = date.to_date
117
+ case period.to_sym
118
+ when :day
119
+ date
120
+ when :month
121
+ Date.new(date.year, date.month)
122
+ else
123
+ Date.new(date.year)
124
+ end
125
+ end
126
+
127
+ def assert_table(table)
128
+ abort "Table not found: #{table}" unless table.exists?
129
+ end
130
+
131
+ def assert_no_table(table)
132
+ abort "Table already exists: #{table}" if table.exists?
133
+ end
134
+
135
+ def advance_date(date, period, count = 1)
136
+ date = date.to_date
137
+ case period.to_sym
138
+ when :day
139
+ date.next_day(count)
140
+ when :month
141
+ date.next_month(count)
142
+ else
143
+ date.next_year(count)
144
+ end
145
+ end
146
+
147
+ def quote_ident(value)
148
+ PG::Connection.quote_ident(value)
149
+ end
150
+
151
+ def quote_table(table)
152
+ table.quote_table
153
+ end
154
+
155
+ def quote_no_schema(table)
156
+ quote_ident(table.name)
157
+ end
158
+
159
+ def create_table(name)
160
+ if name.include?(".")
161
+ schema, name = name.split(".", 2)
162
+ else
163
+ schema = self.schema
164
+ end
165
+ Table.new(schema, name)
166
+ end
167
+
168
+ def make_index_def(index_def, table)
169
+ index_def.sub(/ ON \S+ USING /, " ON #{quote_table(table)} USING ").sub(/ INDEX .+ ON /, " INDEX ON ") + ";"
170
+ end
171
+
172
+ def make_fk_def(fk_def, table)
173
+ "ALTER TABLE #{quote_table(table)} ADD #{fk_def};"
174
+ end
175
+ end
176
+ end