pgslice 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38dff970617c4a96ee7b9234d345e841e0c496d02abbc1900a028a7f0cb46b87
4
- data.tar.gz: 24aa5b2b515991c0310fc3dc8cf54c6a7f322b4ee8743bc5e745ca85a1831cec
3
+ metadata.gz: 2f56f37e43ecc30fd145316bde2bd20478b360c077f928323fdee982379433b8
4
+ data.tar.gz: 594dc56e38e2d5902df435ddc514cdbb15004432f4b2b0d5d95ae37c7f8b00a5
5
5
  SHA512:
6
- metadata.gz: f63af4550ae70f3fe03e3e4104a43652529d0da576f909c23512ea4a3fa12e2b7f3dd9c526821c69f4b01ec685c5f72d9c8988e5ff4ae74f391e846540c068f4
7
- data.tar.gz: 405bb97384272d35e85fb0395e8ec367d1db62bdd7120cb0c9f03c9faa728c804256949fbc78eaf8c048b21aefe93106b3da60f8f9741e83dbb47aec23e64008
6
+ metadata.gz: 4cda3774db98a20ba3d13e1655baa7bcd3f294f810457d1c1a2a6ff7b3da1e861814d26638a95fcfa8f9fcad3280b0a6e419fead798cf83fb76e30108bd8b567
7
+ data.tar.gz: 62b2bc217e0fc0e822e4852159790b5d0e7d5e20a8d27216890362975de7100253183834c383b618c2d7ed1623891fbd41f76d44145e3274d6f7b0e1a1a7a225
data/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ ## 0.6.0 (2023-04-22)
2
+
3
+ - Added support for generated columns
4
+ - Added compression and extended statistics to `prep`
5
+ - Dropped support for Ruby < 2.7
6
+ - Dropped support for Postgres < 11
7
+
1
8
  ## 0.5.0 (2023-01-29)
2
9
 
3
10
  - Dropped support for Ruby < 2.5
data/README.md CHANGED
@@ -32,9 +32,9 @@ This will give you the `pgslice` command. You can also install it with [Homebrew
32
32
  pgslice prep <table> <column> <period>
33
33
  ```
34
34
 
35
- Period can be `day`, `month`, or `year`.
35
+ The column should be a `timestamp`, `timestamptz`, or `date` column and period can be `day`, `month`, or `year`.
36
36
 
37
- This creates a partitioned table named `<table>_intermediate`.
37
+ This creates a partitioned table named `<table>_intermediate` using range partitioning.
38
38
 
39
39
  4. Add partitions to the intermediate table
40
40
 
@@ -50,7 +50,7 @@ This will give you the `pgslice` command. You can also install it with [Homebrew
50
50
  pgslice fill <table>
51
51
  ```
52
52
 
53
- Use the `--batch-size` and `--sleep` options to control the speed.
53
+ Use the `--batch-size` and `--sleep` options to control the speed (defaults to `10000` and `0` respectively)
54
54
 
55
55
  To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
56
56
 
@@ -108,17 +108,17 @@ pgslice add_partitions visits --intermediate --past 1 --future 1
108
108
  ```sql
109
109
  BEGIN;
110
110
 
111
- CREATE TABLE "public"."visits_202108" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2021-08-01') TO ('2021-09-01');
111
+ CREATE TABLE "public"."visits_202208" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2022-08-01') TO ('2022-09-01');
112
112
 
113
- ALTER TABLE "public"."visits_202108" ADD PRIMARY KEY ("id");
113
+ ALTER TABLE "public"."visits_202208" ADD PRIMARY KEY ("id");
114
114
 
115
- CREATE TABLE "public"."visits_202109" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2021-09-01') TO ('2021-10-01');
115
+ CREATE TABLE "public"."visits_202209" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2022-09-01') TO ('2022-10-01');
116
116
 
117
- ALTER TABLE "public"."visits_202109" ADD PRIMARY KEY ("id");
117
+ ALTER TABLE "public"."visits_202209" ADD PRIMARY KEY ("id");
118
118
 
119
- CREATE TABLE "public"."visits_202110" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2021-10-01') TO ('2021-11-01');
119
+ CREATE TABLE "public"."visits_202210" PARTITION OF "public"."visits_intermediate" FOR VALUES FROM ('2022-10-01') TO ('2022-11-01');
120
120
 
121
- ALTER TABLE "public"."visits_202110" ADD PRIMARY KEY ("id");
121
+ ALTER TABLE "public"."visits_202210" ADD PRIMARY KEY ("id");
122
122
 
123
123
  COMMIT;
124
124
  ```
@@ -131,17 +131,17 @@ pgslice fill visits
131
131
  /* 1 of 3 */
132
132
  INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
133
133
  SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
134
- WHERE "id" > 0 AND "id" <= 10000 AND "created_at" >= '2021-08-01'::date AND "created_at" < '2021-11-01'::date
134
+ WHERE "id" > 0 AND "id" <= 10000 AND "created_at" >= '2022-08-01'::date AND "created_at" < '2022-11-01'::date
135
135
 
136
136
  /* 2 of 3 */
137
137
  INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
138
138
  SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
139
- WHERE "id" > 10000 AND "id" <= 20000 AND "created_at" >= '2021-08-01'::date AND "created_at" < '2021-11-01'::date
139
+ WHERE "id" > 10000 AND "id" <= 20000 AND "created_at" >= '2022-08-01'::date AND "created_at" < '2022-11-01'::date
140
140
 
141
141
  /* 3 of 3 */
142
142
  INSERT INTO "public"."visits_intermediate" ("id", "user_id", "ip", "created_at")
143
143
  SELECT "id", "user_id", "ip", "created_at" FROM "public"."visits"
144
- WHERE "id" > 20000 AND "id" <= 30000 AND "created_at" >= '2021-08-01'::date AND "created_at" < '2021-11-01'::date
144
+ WHERE "id" > 20000 AND "id" <= 30000 AND "created_at" >= '2022-08-01'::date AND "created_at" < '2022-11-01'::date
145
145
  ```
146
146
 
147
147
  ```sh
@@ -149,11 +149,11 @@ pgslice analyze visits
149
149
  ```
150
150
 
151
151
  ```sql
152
- ANALYZE VERBOSE "public"."visits_202108";
152
+ ANALYZE VERBOSE "public"."visits_202208";
153
153
 
154
- ANALYZE VERBOSE "public"."visits_202109";
154
+ ANALYZE VERBOSE "public"."visits_202209";
155
155
 
156
- ANALYZE VERBOSE "public"."visits_202110";
156
+ ANALYZE VERBOSE "public"."visits_202210";
157
157
 
158
158
  ANALYZE VERBOSE "public"."visits_intermediate";
159
159
  ```
@@ -217,25 +217,20 @@ WHERE
217
217
  Back up and drop older partitions each day, month, or year.
218
218
 
219
219
  ```sh
220
- pg_dump -c -Fc -t <table>_202109 $PGSLICE_URL > <table>_202109.dump
221
- psql -c "DROP TABLE <table>_202109" $PGSLICE_URL
220
+ pg_dump -c -Fc -t <table>_202209 $PGSLICE_URL > <table>_202209.dump
221
+ psql -c "DROP TABLE <table>_202209" $PGSLICE_URL
222
222
  ```
223
223
 
224
224
  If you use [Amazon S3](https://aws.amazon.com/s3/) for backups, [s3cmd](https://github.com/s3tools/s3cmd) is a nice tool.
225
225
 
226
226
  ```sh
227
- s3cmd put <table>_202109.dump s3://<s3-bucket>/<table>_202109.dump
227
+ s3cmd put <table>_202209.dump s3://<s3-bucket>/<table>_202209.dump
228
228
  ```
229
229
 
230
230
  ## Schema Updates
231
231
 
232
232
  Once a table is partitioned, make schema updates on the master table only (not partitions). This includes adding, removing, and modifying columns, as well as adding and removing indexes and foreign keys.
233
233
 
234
- A few exceptions are:
235
-
236
- - For Postgres 10, make index and foreign key updates on partitions only
237
- - For Postgres < 10, make index and foreign key updates on the master table and all partitions
238
-
239
234
  ## Additional Commands
240
235
 
241
236
  To undo prep (which will delete partitions), use:
@@ -272,27 +267,20 @@ SELECT * FROM
272
267
  WHERE
273
268
  user_id = 123 AND
274
269
  -- for performance
275
- created_at >= '2021-09-01' AND created_at < '2021-09-02'
270
+ created_at >= '2022-09-01' AND created_at < '2022-09-02'
276
271
  ```
277
272
 
278
- For this to be effective, ensure `constraint_exclusion` is set to `partition` (default value) or `on`.
273
+ For this to be effective, ensure `constraint_exclusion` is set to `partition` (the default value) or `on`.
279
274
 
280
275
  ```sql
281
276
  SHOW constraint_exclusion;
282
277
  ```
283
278
 
284
- ### Writes
285
-
286
- Before Postgres 10, if you use `INSERT` statements with a `RETURNING` clause (as frameworks like Rails do), you’ll no longer receive the id of the newly inserted record(s) back. If you need this, you can either:
287
-
288
- 1. Insert directly into the partition
289
- 2. Get value before the insert with `SELECT nextval('sequence_name')` (for multiple rows, append `FROM generate_series(1, n)`)
290
-
291
279
  ## Frameworks
292
280
 
293
281
  ### Rails
294
282
 
295
- For Postgres 10+, specify the primary key for partitioned models to ensure it’s returned.
283
+ Specify the primary key for partitioned models to ensure it’s returned.
296
284
 
297
285
  ```ruby
298
286
  class Visit < ApplicationRecord
@@ -300,16 +288,6 @@ class Visit < ApplicationRecord
300
288
  end
301
289
  ```
302
290
 
303
- Before Postgres 10, preload the value.
304
-
305
- ```ruby
306
- class Visit < ApplicationRecord
307
- before_create do
308
- self.id ||= self.class.connection.select_all("SELECT nextval('#{self.class.sequence_name}')").first["nextval"]
309
- end
310
- end
311
- ```
312
-
313
291
  ### Other Frameworks
314
292
 
315
293
  Please submit a PR if additional configuration is needed.
@@ -328,10 +306,6 @@ pgslice swap <table>
328
306
 
329
307
  Triggers aren’t copied from the original table. You can set up triggers on the intermediate table if needed. Note that Postgres doesn’t support `BEFORE / FOR EACH ROW` triggers on partitioned tables.
330
308
 
331
- ## Declarative Partitioning
332
-
333
- Postgres 10 introduces [declarative partitioning](https://www.postgresql.org/docs/10/static/ddl-partitioning.html#ddl-partitioning-declarative). A major benefit is `INSERT` statements with a `RETURNING` clause work as expected. If you prefer to use trigger-based partitioning instead (not recommended), pass the `--trigger-based` option to the `prep` command.
334
-
335
309
  ## Data Protection
336
310
 
337
311
  Always make sure your [connection is secure](https://ankane.org/postgres-sslmode-explained) when connecting to a database over a network you don’t fully trust. Your best option is to connect over SSH or a VPN. Another option is to use `sslmode=verify-full`. If you don’t do this, your database credentials can be compromised.
@@ -96,6 +96,9 @@ CREATE TABLE #{quote_table(partition)}
96
96
  partitions.each do |partition|
97
97
  day = partition_date(partition, name_format)
98
98
 
99
+ # note: does not support generated columns
100
+ # could support by listing columns
101
+ # but this would cause issues with schema changes
99
102
  sql = "(NEW.#{quote_ident(field)} >= #{sql_date(day, cast)} AND NEW.#{quote_ident(field)} < #{sql_date(advance_date(day, period, 1), cast)}) THEN
100
103
  INSERT INTO #{quote_table(partition)} VALUES (NEW.*);"
101
104
 
@@ -3,6 +3,7 @@ module PgSlice
3
3
  desc "prep TABLE [COLUMN] [PERIOD]", "Create an intermediate table for partitioning"
4
4
  option :partition, type: :boolean, default: true, desc: "Partition the table"
5
5
  option :trigger_based, type: :boolean, default: false, desc: "Use trigger-based partitioning"
6
+ option :test_version, type: :numeric, hide: true
6
7
  def prep(table, column=nil, period=nil)
7
8
  table = create_table(table)
8
9
  intermediate_table = table.intermediate_table
@@ -24,23 +25,23 @@ module PgSlice
24
25
  queries = []
25
26
 
26
27
  # version summary
27
- # 1. trigger-based
28
- # 2. declarative, with indexes and foreign keys on child tables
29
- # 3. declarative, with indexes and foreign keys on parent table
30
- version =
31
- if options[:trigger_based] || server_version_num < 100000
32
- 1
33
- elsif server_version_num < 110000
34
- 2
35
- else
36
- 3
37
- end
28
+ # 1. trigger-based (pg9)
29
+ # 2. declarative, with indexes and foreign keys on child tables (pg10)
30
+ # 3. declarative, with indexes and foreign keys on parent table (pg11+)
31
+ version = options[:test_version] || (options[:trigger_based] ? 1 : 3)
38
32
 
39
33
  declarative = version > 1
40
34
 
41
35
  if declarative && options[:partition]
36
+ including = ["DEFAULTS", "CONSTRAINTS", "STORAGE", "COMMENTS", "STATISTICS"]
37
+ if server_version_num >= 120000
38
+ including << "GENERATED"
39
+ end
40
+ if server_version_num >= 140000
41
+ including << "COMPRESSION"
42
+ end
42
43
  queries << <<-SQL
43
- CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS) PARTITION BY RANGE (#{quote_ident(column)});
44
+ CREATE TABLE #{quote_table(intermediate_table)} (LIKE #{quote_table(table)} #{including.map { |v| "INCLUDING #{v}" }.join(" ")}) PARTITION BY RANGE (#{quote_ident(column)});
44
45
  SQL
45
46
 
46
47
  if version == 3
@@ -20,7 +20,7 @@ module PgSlice
20
20
  queries << "ALTER SEQUENCE #{quote_ident(sequence["sequence_schema"])}.#{quote_ident(sequence["sequence_name"])} OWNED BY #{quote_table(table)}.#{quote_ident(sequence["related_column"])};"
21
21
  end
22
22
 
23
- queries.unshift("SET LOCAL lock_timeout = '#{options[:lock_timeout]}';") if server_version_num >= 90300
23
+ queries.unshift("SET LOCAL lock_timeout = #{escape_literal(options[:lock_timeout])};")
24
24
 
25
25
  run_queries(queries)
26
26
  end
@@ -35,11 +35,15 @@ module PgSlice
35
35
  @schema = Array(params.delete("schema") || "public")[0]
36
36
  uri.query = params.any? ? URI.encode_www_form(params) : nil
37
37
 
38
- ENV["PGCONNECT_TIMEOUT"] ||= "1"
38
+ ENV["PGCONNECT_TIMEOUT"] ||= "3"
39
39
  conn = PG::Connection.new(uri.to_s)
40
40
  conn.set_notice_processor do |message|
41
41
  say message
42
42
  end
43
+ @server_version_num = conn.exec("SHOW server_version_num")[0]["server_version_num"].to_i
44
+ if @server_version_num < 110000
45
+ abort "This version of pgslice requires Postgres 11+"
46
+ end
43
47
  conn
44
48
  end
45
49
  rescue PG::ConnectionBad => e
@@ -86,7 +90,8 @@ module PgSlice
86
90
  end
87
91
 
88
92
  def server_version_num
89
- execute("SHOW server_version_num")[0]["server_version_num"].to_i
93
+ connection # ensure called first
94
+ @server_version_num
90
95
  end
91
96
 
92
97
  # helpers
@@ -97,7 +102,7 @@ module PgSlice
97
102
  else
98
103
  fmt = "%Y-%m-%d"
99
104
  end
100
- str = "'#{time.strftime(fmt)}'"
105
+ str = escape_literal(time.strftime(fmt))
101
106
  add_cast ? "#{str}::#{cast}" : str
102
107
  end
103
108
 
@@ -152,6 +157,10 @@ module PgSlice
152
157
  PG::Connection.quote_ident(value)
153
158
  end
154
159
 
160
+ def escape_literal(value)
161
+ connection.escape_literal(value)
162
+ end
163
+
155
164
  def quote_table(table)
156
165
  table.quote_table
157
166
  end
@@ -176,5 +185,12 @@ module PgSlice
176
185
  def make_fk_def(fk_def, table)
177
186
  "ALTER TABLE #{quote_table(table)} ADD #{fk_def};"
178
187
  end
188
+
189
+ def make_stat_def(stat_def, table)
190
+ m = /ON (.+) FROM/.match(stat_def)
191
+ # errors on duplicate names, but should be rare
192
+ stat_name = "#{table}_#{m[1].split(", ").map { |v| v.gsub(/\W/i, "") }.join("_")}_stat"
193
+ stat_def.sub(/ FROM \S+/, " FROM #{quote_table(table)}").sub(/ STATISTICS .+ ON /, " STATISTICS #{quote_ident(stat_name)} ON ") + ";"
194
+ end
179
195
  end
180
196
  end
data/lib/pgslice/table.rb CHANGED
@@ -16,7 +16,7 @@ module PgSlice
16
16
  end
17
17
 
18
18
  def columns
19
- execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2", [schema, name]).map{ |r| r["column_name"] }
19
+ execute("SELECT column_name FROM information_schema.columns WHERE table_schema = $1 AND table_name = $2 AND is_generated = 'NEVER'", [schema, name]).map{ |r| r["column_name"] }
20
20
  end
21
21
 
22
22
  # http://www.dbforums.com/showthread.php?1667561-How-to-list-sequences-and-the-columns-by-SQL
@@ -178,12 +178,16 @@ module PgSlice
178
178
  PgSlice::CLI.instance.send(:execute, *args)
179
179
  end
180
180
 
181
+ def escape_literal(value)
182
+ PgSlice::CLI.instance.send(:escape_literal, value)
183
+ end
184
+
181
185
  def quote_ident(value)
182
186
  PG::Connection.quote_ident(value)
183
187
  end
184
188
 
185
189
  def regclass
186
- "'#{quote_table}'::regclass"
190
+ "#{escape_literal(quote_table)}::regclass"
187
191
  end
188
192
 
189
193
  def sql_date(time, cast, add_cast = true)
@@ -192,7 +196,7 @@ module PgSlice
192
196
  else
193
197
  fmt = "%Y-%m-%d"
194
198
  end
195
- str = "'#{time.strftime(fmt)}'"
199
+ str = escape_literal(time.strftime(fmt))
196
200
  add_cast ? "#{str}::#{cast}" : str
197
201
  end
198
202
  end
@@ -1,3 +1,3 @@
1
1
  module PgSlice
2
- VERSION = "0.5.0"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgslice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-01-30 00:00:00.000000000 Z
11
+ date: 2023-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: thor
14
+ name: pg
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: pg
28
+ name: thor
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.18.2
33
+ version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 0.18.2
40
+ version: '0'
41
41
  description:
42
42
  email: andrew@ankane.org
43
43
  executables:
@@ -73,14 +73,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ">="
75
75
  - !ruby/object:Gem::Version
76
- version: '2.5'
76
+ version: '2.7'
77
77
  required_rubygems_version: !ruby/object:Gem::Requirement
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
81
  version: '0'
82
82
  requirements: []
83
- rubygems_version: 3.4.1
83
+ rubygems_version: 3.4.10
84
84
  signing_key:
85
85
  specification_version: 4
86
86
  summary: Postgres partitioning as easy as pie