pg_online_schema_change 0.3.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b06d39169d55c016d78969968c71cac7f0c49a3c716d0bc8c5004905ef820fb0
4
- data.tar.gz: 78fcc0a928a5efa645ec201cfca093f4ef078b8d683f0776a7f9ad91bb11ef99
3
+ metadata.gz: 3bc78403dc036598371393d5be0914bd7150660eda9798fb827af2892d058396
4
+ data.tar.gz: 6dbfb6e41267accf02e0da8ba88d7ea5039aa368ddc497e40d81f8952bed811d
5
5
  SHA512:
6
- metadata.gz: 179fcaf09bcc36b69186ba48494835841a88c095e010756f0e6f322a146279d0790403f2c195f23f483e89a1cb74c10b50a3ceee26ad6d311e3d9977af1b8c07
7
- data.tar.gz: 157255f9d901ec2cb980d84bd664cc0eaae32854bdd79ebd262b2112eef965f8dc2d123277a35446fefd6ecdac4a4ba4c94769589461053241d1eedbe40efa52
6
+ metadata.gz: '0683dd33e681162a1b16b471fbefc52021b66ccbb3235994fd0654c6d97052a5482e22e29c1280cbc332eb8510b1b4a571075b4e36f9a2565c359ad086100d05'
7
+ data.tar.gz: 51e1fcd9d8d8d4f2a3f5760dee1e51fbde5a45f3bf7dd9c2d048390856b3b71bfb23333b053e0a0e43c026e90a9f704efbc903760cd37e95eb5ac5ea24b66512
data/.rubocop.yml CHANGED
@@ -1,3 +1,5 @@
1
+ inherit_from: .rubocop_todo.yml
2
+
1
3
  require:
2
4
  - rubocop-rspec
3
5
  - rubocop-packaging
@@ -14,71 +16,70 @@ AllCops:
14
16
  - "vendor/**/*"
15
17
 
16
18
  Layout/HashAlignment:
17
- EnforcedColonStyle:
18
- - table
19
- - key
20
- EnforcedHashRocketStyle:
21
- - table
22
- - key
19
+ EnforcedColonStyle: key
20
+ EnforcedHashRocketStyle: key
23
21
 
24
22
  Layout/SpaceAroundEqualsInParameterDefault:
25
- EnforcedStyle: no_space
23
+ EnforcedStyle: space
26
24
 
27
25
  Metrics/AbcSize:
28
- Max: 20
26
+ Enabled: true
27
+ Max: 40
29
28
  Exclude:
30
- - "test/**/*"
29
+ - "spec/**/*"
31
30
 
32
31
  Metrics/BlockLength:
32
+ Max: 100
33
33
  Exclude:
34
34
  - "*.gemspec"
35
35
  - "Rakefile"
36
+ - "spec/**/*"
36
37
 
37
38
  Metrics/ClassLength:
38
39
  Exclude:
39
40
  - "test/**/*"
40
41
 
41
42
  Metrics/MethodLength:
42
- Max: 18
43
+ Max: 30
43
44
  Exclude:
44
45
  - "test/**/*"
45
46
 
46
47
  Metrics/ParameterLists:
47
- Max: 6
48
+ Max: 5
48
49
 
49
50
  Naming/MemoizedInstanceVariableName:
50
- Enabled: false
51
+ Enabled: true
51
52
 
52
53
  Naming/VariableNumber:
53
- Enabled: false
54
-
55
- Rake/Desc:
56
- Enabled: false
54
+ Enabled: true
57
55
 
58
56
  Style/BarePercentLiterals:
59
57
  EnforcedStyle: percent_q
60
58
 
61
59
  Style/ClassAndModuleChildren:
62
- Enabled: false
60
+ Enabled: true
63
61
 
64
62
  Style/Documentation:
65
63
  Enabled: false
66
64
 
67
65
  Style/DoubleNegation:
68
- Enabled: false
66
+ Enabled: true
69
67
 
70
68
  Style/EmptyMethod:
71
- Enabled: false
69
+ Enabled: true
72
70
 
73
71
  Style/FrozenStringLiteralComment:
74
- Enabled: false
72
+ Enabled: true
75
73
 
76
74
  Style/NumericPredicate:
77
- Enabled: false
75
+ Enabled: true
78
76
 
79
77
  Style/StringLiterals:
80
78
  EnforcedStyle: double_quotes
81
79
 
80
+ Style/StringLiteralsInInterpolation:
81
+ EnforcedStyle: double_quotes
82
+
82
83
  Style/TrivialAccessors:
83
84
  AllowPredicates: true
84
85
 
@@ -91,9 +92,41 @@ Style/TrailingCommaInArrayLiteral:
91
92
  Style/TrailingCommaInHashLiteral:
92
93
  EnforcedStyleForMultiline: comma
93
94
 
94
- Style/SpaceAroundEqualsInParameterDefault:
95
- EnforcedStyle: space
95
+ Layout/MultilineArrayBraceLayout:
96
+ Enabled: true
97
+ EnforcedStyle: symmetrical
96
98
 
97
- Style/MultilineHashBraceLayout:
99
+ Layout/MultilineHashBraceLayout:
98
100
  Enabled: true
99
101
  EnforcedStyle: symmetrical
102
+
103
+ Layout/MultilineAssignmentLayout:
104
+ Enabled: true
105
+ EnforcedStyle: same_line
106
+
107
+ Layout/FirstArrayElementIndentation:
108
+ Enabled: true
109
+ EnforcedStyle: consistent
110
+
111
+ Layout/FirstHashElementIndentation:
112
+ Enabled: true
113
+ EnforcedStyle: consistent
114
+
115
+ Layout/MultilineHashKeyLineBreaks:
116
+ Enabled: true
117
+
118
+ Layout/LineLength:
119
+ Enabled: true
120
+ Max: 250
121
+
122
+ Style/FormatStringToken:
123
+ Enabled: true
124
+ EnforcedStyle: template
125
+
126
+ RSpec/MessageSpies:
127
+ Enabled: true
128
+ EnforcedStyle: receive
129
+
130
+ RSpec/FilePath:
131
+ Enabled: true
132
+ SpecSuffixOnly: true
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,44 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2022-02-21 22:46:44 UTC using RuboCop version 1.23.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 2
10
+ # Configuration parameters: CountComments, CountAsOne.
11
+ Metrics/ClassLength:
12
+ Max: 233
13
+
14
+ # Offense count: 2
15
+ # Configuration parameters: IgnoredMethods.
16
+ Metrics/CyclomaticComplexity:
17
+ Max: 15
18
+
19
+ # Offense count: 2
20
+ # Configuration parameters: IgnoredMethods.
21
+ Metrics/PerceivedComplexity:
22
+ Max: 13
23
+
24
+ # Offense count: 1
25
+ Packaging/GemspecGit:
26
+ Exclude:
27
+ - 'pg_online_schema_change.gemspec'
28
+
29
+ # Offense count: 62
30
+ # Configuration parameters: CountAsOne.
31
+ RSpec/ExampleLength:
32
+ Max: 55
33
+
34
+ # Offense count: 38
35
+ RSpec/MultipleExpectations:
36
+ Max: 14
37
+
38
+ # Offense count: 6
39
+ # Configuration parameters: AllowedMethods.
40
+ # AllowedMethods: respond_to_missing?
41
+ Style/OptionalBooleanParameter:
42
+ Exclude:
43
+ - 'lib/pg_online_schema_change/query.rb'
44
+ - 'lib/pg_online_schema_change/replay.rb'
data/CHANGELOG.md CHANGED
@@ -1,3 +1,22 @@
1
+ ## [0.5.0] - 2022-02-26
2
+ * Share some preliminary load test figures in https://github.com/shayonj/pg-osc/pull/54
3
+ * Reuse existing transaction open for reading table columns in https://github.com/shayonj/pg-osc/pull/53
4
+ * Start to deprecate --password with PGPASSWORD in https://github.com/shayonj/pg-osc/pull/56
5
+ * Introduce configurable PULL_BATCH_COUNT and DELTA_COUNT in https://github.com/shayonj/pg-osc/pull/57
6
+
7
+ ## [0.4.0] - 2022-02-22
8
+ * Lint sourcecode, setup Rubocop proper and Lint in CI by @shayonj in https://github.com/shayonj/pg-osc/pull/46
9
+ * Uniquely identify operation_type column by @shayonj in https://github.com/shayonj/pg-osc/pull/50
10
+ * Introduce primary key on audit table for ordered reads by @shayonj in https://github.com/shayonj/pg-osc/pull/49
11
+ - This addresses an edge case with replay.
12
+ * Uniquely identify trigger_time column by @shayonj in https://github.com/shayonj/pg-osc/pull/51
13
+ * Abstract assertions into a helper function by @shayonj in https://github.com/shayonj/pg-osc/pull/52
14
+
15
+ ## [0.3.0] - 2022-02-21
16
+
17
+ - Explicitly call dependencies and bump dependencies by @shayonj https://github.com/shayonj/pg-osc/pull/44
18
+ - Introduce Dockerfile and release process https://github.com/shayonj/pg-osc/pull/45
19
+
1
20
  ## [0.2.0] - 2022-02-17
2
21
 
3
22
  - Use ISOLATION LEVEL SERIALIZABLE ([#42](https://github.com/shayonj/pg-osc/pull/42)) (props to @jfrost)
data/CODE_OF_CONDUCT.md CHANGED
@@ -39,7 +39,7 @@ This Code of Conduct applies within all community spaces, and also applies when
39
39
 
40
40
  ## Enforcement
41
41
 
42
- Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at shayon@loom.com. All complaints will be reviewed and investigated promptly and fairly.
42
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at shayonj@gmail.com. All complaints will be reviewed and investigated promptly and fairly.
43
43
 
44
44
  All community leaders are obligated to respect the privacy and security of the reporter of any incident.
45
45
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pg_online_schema_change (0.2.0)
4
+ pg_online_schema_change (0.5.0)
5
5
  ougai (~> 2.0.0)
6
6
  pg (~> 1.3.2)
7
7
  pg_query (~> 2.1.3)
@@ -14,7 +14,6 @@ GEM
14
14
  coderay (1.1.3)
15
15
  diff-lcs (1.5.0)
16
16
  google-protobuf (3.19.4)
17
- google-protobuf (3.19.4-x86_64-linux)
18
17
  method_source (1.0.0)
19
18
  oj (3.13.11)
20
19
  ougai (2.0.0)
data/README.md CHANGED
@@ -16,8 +16,8 @@ pg-online-schema-change (`pg-osc`) is a tool for making schema changes (any `ALT
16
16
  - [Installation](#installation)
17
17
  - [Requirements](#requirements)
18
18
  - [Usage](#usage)
19
- - [How does it work](#how-does-it-work)
20
19
  - [Prominent features](#prominent-features)
20
+ - [Load test](#load-test)
21
21
  - [Examples](#examples)
22
22
  * [Renaming a column](#renaming-a-column)
23
23
  * [Multiple ALTER statements](#multiple-alter-statements)
@@ -25,6 +25,7 @@ pg-online-schema-change (`pg-osc`) is a tool for making schema changes (any `ALT
25
25
  * [Backfill data](#backfill-data)
26
26
  * [Running using Docker](#running-using-docker)
27
27
  - [Caveats](#caveats)
28
+ - [How does it work](#how-does-it-work)
28
29
  - [Development](#development)
29
30
  - [Releasing](#releasing)
30
31
  - [Contributing](#contributing)
@@ -75,13 +76,17 @@ Options:
75
76
  -u, --username=USERNAME # Username for the Database
76
77
  -p, --port=N # Port for the Database
77
78
  # Default: 5432
78
- -w, --password=PASSWORD # Password for the Database
79
+ -w, --password=PASSWORD # DEPRECATED: Password for the Database. Please pass PGPASSWORD environment variable instead.
79
80
  -v, [--verbose], [--no-verbose] # Emit logs in debug mode
80
81
  -f, [--drop], [--no-drop] # Drop the original table in the end after the swap
81
82
  -k, [--kill-backends], [--no-kill-backends] # Kill other competing queries/backends when trying to acquire lock for the shadow table creation and swap. It will wait for --wait-time-for-lock duration before killing backends and try upto 3 times.
82
83
  -w, [--wait-time-for-lock=N] # Time to wait before killing backends to acquire lock and/or retrying upto 3 times. It will kill backends if --kill-backends is true, otherwise try upto 3 times and exit if it cannot acquire a lock.
83
84
  # Default: 10
84
- -c, [--copy-statement=COPY_STATEMENT] # Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pg-osc copies data from the primary to the shadow table. More examples in README.
85
+ -c, [--copy-statement=COPY_STATEMENT] # Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pgosc copies data from the primary to the shadow table. More examples in README.
86
+ -b, [--pull-batch-count=N] # Number of rows to be replayed on each iteration after copy. This can be tuned for faster catch up and swap. Best used with delta-count.
87
+ # Default: 1000
88
+ -e, [--delta-count=N] # Indicates how many rows should be remaining before a swap should be performed. This can be tuned for faster catch up and swap, especially on highly volume tables. Best used with pull-batch-count.
89
+ # Default: 20
85
90
  ```
86
91
 
87
92
  ```
@@ -90,57 +95,39 @@ Usage:
90
95
 
91
96
  print the version
92
97
  ```
93
- ## How does it work
94
-
95
- - **Primary table**: A table against which a potential schema change is to be run
96
- - **Shadow table**: A copy of an existing primary table
97
- - **Audit table**: A table to store any updates/inserts/delete on a primary table
98
-
99
- ![how-it-works](diagrams/how-it-works.png)
100
-
101
-
102
- 1. Create an audit table to record changes made to the parent table.
103
- 2. Acquire a brief `ACCESS EXCLUSIVE` lock to add a trigger on the parent table (for inserts, updates, deletes) to the audit table.
104
- 3. Create a new shadow table and run ALTER/migration on the shadow table.
105
- 4. Copy all rows from the old table.
106
- 5. Build indexes on the new table.
107
- 6. Replay all changes accumulated in the audit table against the shadow table.
108
- - Delete rows in the audit table as they are replayed.
109
- 7. Once the delta (remaining rows) is ~20 rows, acquire an `ACCESS EXCLUSIVE` lock against the parent table within a transaction and:
110
- - swap table names (shadow table <> parent table).
111
- - update references in other tables (FKs) by dropping and re-creating the FKs with a `NOT VALID`.
112
- 8. Runs `ANALYZE` on the new table.
113
- 9. Validates all FKs that were added with `NOT VALID`.
114
- 10. Drop parent (now old) table (OPTIONAL).
115
-
116
98
  ## Prominent features
117
99
  - `pg-osc` supports when a column is being added, dropped or renamed with no data loss.
118
100
  - `pg-osc` acquires minimal locks throughout the process (read more below on the caveats).
119
101
  - Copies over indexes and Foreign keys.
120
102
  - Optionally drop or retain old tables in the end.
103
+ - Tune how slow or fast should replays be from the audit/log table ([Replaying larger workloads](#replaying-larger-workloads)).
121
104
  - Backfill old/new columns as data is copied from primary table to shadow table, and then perform the swap. [Example](#backfill-data)
122
105
  - **TBD**: Ability to reverse the change with no data loss. [tracking issue](https://github.com/shayonj/pg-osc/issues/14)
123
106
 
107
+ ## Load test
108
+
109
+ [More about the preliminary load test figures here](docs/load-test.md)
110
+
124
111
  ## Examples
125
112
 
126
113
  ### Renaming a column
127
114
  ```
115
+ export PGPASSWORD=""
128
116
  pg-online-schema-change perform \
129
117
  --alter-statement 'ALTER TABLE books RENAME COLUMN email TO new_email' \
130
118
  --dbname "postgres" \
131
119
  --host "localhost" \
132
120
  --username "jamesbond" \
133
- --password "" \
134
121
  ```
135
122
 
136
123
  ### Multiple ALTER statements
137
124
  ```
125
+ export PGPASSWORD=""
138
126
  pg-online-schema-change perform \
139
127
  --alter-statement 'ALTER TABLE books ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE; ALTER TABLE books RENAME COLUMN email TO new_email;' \
140
128
  --dbname "postgres" \
141
129
  --host "localhost" \
142
130
  --username "jamesbond" \
143
- --password "" \
144
131
  --drop
145
132
  ```
146
133
 
@@ -148,13 +135,30 @@ pg-online-schema-change perform \
148
135
  If the operation is being performed on a busy table, you can use `pg-osc`'s `kill-backend` functionality to kill other backends that may be competing with the `pg-osc` operation to acquire a lock for a brief while. The `ACCESS EXCLUSIVE` lock acquired by `pg-osc` is only held for a brief while and released after. You can tune how long `pg-osc` should wait before killing other backends (or if at all `pg-osc` should kill backends in the first place).
149
136
 
150
137
  ```
138
+ export PGPASSWORD=""
151
139
  pg-online-schema-change perform \
152
140
  --alter-statement 'ALTER TABLE books ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;' \
153
141
  --dbname "postgres" \
154
142
  --host "localhost" \
155
143
  --username "jamesbond" \
156
- --password "" \
157
- --wait-time-for-lock=5 \
144
+ --wait-time-for-lock 5 \
145
+ --kill-backends \
146
+ --drop
147
+ ```
148
+
149
+ ### Replaying larger workloads
150
+ If you have a table with high write volume, the default replay iteration may not suffice. That is - you may see that `pg-osc` is replaying 1000 rows (`pull-batch-count`) in one go from the audit table. `pg-osc` also waits until the remaining row count (`delta-count`) in audit table is 20 before making the swap. You can tune these values to be higher for faster catch up on these kind of workloads.
151
+
152
+ ```
153
+ export PGPASSWORD=""
154
+ pg-online-schema-change perform \
155
+ --alter-statement 'ALTER TABLE books ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;' \
156
+ --dbname "postgres" \
157
+ --host "localhost" \
158
+ --username "jamesbond" \
159
+ --pull-batch-count 2000
160
+ --delta-count 500
161
+ --wait-time-for-lock 5 \
158
162
  --kill-backends \
159
163
  --drop
160
164
  ```
@@ -183,7 +187,6 @@ pg-online-schema-change perform \
183
187
  --dbname "postgres" \
184
188
  --host "localhost" \
185
189
  --username "jamesbond" \
186
- --password "" \
187
190
  --copy-statement "/src/query.sql" \
188
191
  --drop
189
192
  ```
@@ -197,7 +200,6 @@ docker run --network host -it --rm shayonj/pg-osc:latest \
197
200
  --dbname "postgres" \
198
201
  --host "localhost" \
199
202
  --username "jamesbond" \
200
- --password "" \
201
203
  --drop
202
204
  ```
203
205
  ## Caveats
@@ -215,6 +217,29 @@ docker run --network host -it --rm shayonj/pg-osc:latest \
215
217
  - Can be fixed in future releases. Feel free to open a feature req.
216
218
  - Foreign keys are dropped & re-added to referencing tables with a `NOT VALID`. A follow on `VALIDATE CONSTRAINT` is run.
217
219
  - Ensures that integrity is maintained and re-introducing FKs doesn't acquire additional locks, hence the `NOT VALID`.
220
+ ## How does it work
221
+
222
+ - **Primary table**: A table against which a potential schema change is to be run
223
+ - **Shadow table**: A copy of an existing primary table
224
+ - **Audit table**: A table to store any updates/inserts/delete on a primary table
225
+
226
+ ![how-it-works](docs/how-it-works.png)
227
+
228
+
229
+ 1. Create an audit table to record changes made to the parent table.
230
+ 2. Acquire a brief `ACCESS EXCLUSIVE` lock to add a trigger on the parent table (for inserts, updates, deletes) to the audit table.
231
+ 3. Create a new shadow table and run ALTER/migration on the shadow table.
232
+ 4. Copy all rows from the old table.
233
+ 5. Build indexes on the new table.
234
+ 6. Replay all changes accumulated in the audit table against the shadow table.
235
+ - Delete rows in the audit table as they are replayed.
236
+ 7. Once the delta (remaining rows) is ~20 rows, acquire an `ACCESS EXCLUSIVE` lock against the parent table within a transaction and:
237
+ - swap table names (shadow table <> parent table).
238
+ - update references in other tables (FKs) by dropping and re-creating the FKs with a `NOT VALID`.
239
+ 8. Runs `ANALYZE` on the new table.
240
+ 9. Validates all FKs that were added with `NOT VALID`.
241
+ 10. Drop parent (now old) table (OPTIONAL).
242
+
218
243
  ## Development
219
244
 
220
245
  - Install ruby 3.0
File without changes
File without changes
Binary file
data/docs/load-test.md ADDED
@@ -0,0 +1,138 @@
1
+ # Preliminary Load Test
2
+
3
+ ## pg-osc: No downtime schema changes with 7K+ writes/s & 12k+ reads/s
4
+
5
+ This is a very basic load test performed with `pgbench` against a single instance PostgreSQL DB running on DigitialOcean with the following configuration:
6
+
7
+ - **128GB RAM**
8
+ - **32vCPU**
9
+ - **695GB Disk**
10
+ - Trasanction based connection pool with **500 pool limit**
11
+
12
+ Total time taken to run schema change: **<3mins**
13
+
14
+ ## Simulating load with pgbench
15
+
16
+ **Initialize**
17
+ ```
18
+ pgbench -p $PORT --initialize -s 20 -F 20 --foreign-keys --host $HOST -U $USERNAME -d $DB
19
+ ```
20
+
21
+ This creates bunch of pgbench tables. The table being used with `pg-osc` is `pgbench_accounts` which has FKs and also references by other tables with FKS, containing 2M rows.
22
+
23
+ **Begin**
24
+ ```
25
+ pgbench -p $PORT -j 72 -c 288 -T 500 -r --host $DB_HOST -U $USERNAME -d $DB
26
+ ```
27
+
28
+ ## Running pg-osc
29
+
30
+ Simple `ALTER` statement for experimentation purposes.
31
+
32
+ ```sql
33
+ ALTER TABLE pgbench_accounts ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;
34
+ ```
35
+
36
+ **Execution**
37
+
38
+ ```bash
39
+ bundle exec bin/pg-online-schema-change perform \
40
+ -a 'ALTER TABLE pgbench_accounts ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;' \
41
+ -d "pool" \
42
+ -p 25061
43
+ -h "..." \
44
+ -u "..." \
45
+ --pull-batch-count 2000 \
46
+ --delta-count 200
47
+ ```
48
+
49
+ ## Outcome
50
+
51
+ **pgbench results**
52
+
53
+ ```
54
+ number of transactions actually processed: 1060382
55
+ latency average = 144.874 ms
56
+ tps = 1767.057392 (including connections establishing)
57
+ tps = 1777.971823 (excluding connections establishing)
58
+ statement latencies in milliseconds:
59
+ 0.479 \set aid random(1, 100000 * :scale)
60
+ 0.409 \set bid random(1, 1 * :scale)
61
+ 0.247 \set tid random(1, 10 * :scale)
62
+ 0.208 \set delta random(-5000, 5000)
63
+ 3.136 BEGIN;
64
+ 4.243 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
65
+ 4.488 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
66
+ 71.017 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
67
+ 46.689 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
68
+ 4.035 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
69
+ 4.166 END;
70
+ ```
71
+
72
+ **Metrics**
73
+ ![load-test](load-test-1.png)
74
+
75
+ **New table structure**
76
+
77
+ Added `purchased` column.
78
+
79
+ ```
80
+ defaultdb=> \d+ pgbench_accounts;
81
+ Table "public.pgbench_accounts"
82
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
83
+ -----------+---------------+-----------+----------+---------+----------+--------------+-------------
84
+ aid | integer | | not null | | plain | |
85
+ bid | integer | | | | plain | |
86
+ abalance | integer | | | | plain | |
87
+ filler | character(84) | | | | extended | |
88
+ purchased | boolean | | | false | plain | |
89
+ Indexes:
90
+ "pgosc_st_pgbench_accounts_815029_pkey" PRIMARY KEY, btree (aid)
91
+ Foreign-key constraints:
92
+ "pgbench_accounts_bid_fkey" FOREIGN KEY (bid) REFERENCES pgbench_branches(bid)
93
+ Referenced by:
94
+ TABLE "pgbench_history" CONSTRAINT "pgbench_history_aid_fkey" FOREIGN KEY (aid) REFERENCES pgbench_accounts(aid)
95
+ Options: autovacuum_enabled=false, fillfactor=20
96
+ ```
97
+
98
+ **Logs**
99
+
100
+ <details>
101
+ <summary>Logs from pg-osc</summary>
102
+
103
+ ```json
104
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:01.147-05:00","v":0,"msg":"Setting up audit table","audit_table":"pgosc_at_pgbench_accounts_714a8b","version":"0.4.0"}
105
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:01.660-05:00","v":0,"msg":"Setting up triggers","version":"0.4.0"}
106
+ NOTICE: trigger "primary_to_audit_table_trigger" for relation "pgbench_accounts" does not exist, skipping
107
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:01.814-05:00","v":0,"msg":"Setting up shadow table","shadow_table":"pgosc_st_pgbench_accounts_714a8b","version":"0.4.0"}
108
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:02.169-05:00","v":0,"msg":"Running alter statement on shadow table","shadow_table":"pgosc_st_pgbench_accounts_714a8b","parent_table":"pgbench_accounts","version":"0.4.0"}
109
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:02.204-05:00","v":0,"msg":"Clearing contents of audit table before copy..","shadow_table":"pgosc_st_pgbench_accounts_714a8b","parent_table":"pgbench_accounts","version":"0.4.0"}
110
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:02.240-05:00","v":0,"msg":"Copying contents..","shadow_table":"pgosc_st_pgbench_accounts_714a8b","parent_table":"pgbench_accounts","version":"0.4.0"}
111
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:20.481-05:00","v":0,"msg":"Performing ANALYZE!","version":"0.4.0"}
112
+ INFO: analyzing "public.pgbench_accounts"
113
+ INFO: "pgbench_accounts": scanned 30000 of 166667 pages, containing 360000 live rows and 200 dead rows; 30000 rows in sample, 2000004 estimated total rows
114
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:21.078-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
115
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:21.580-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
116
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:22.022-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
117
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:22.490-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
118
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:22.866-05:00","v":0,"msg":"Replaying rows, count: 661","version":"0.4.0"}
119
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.212-05:00","v":0,"msg":"Replaying rows, count: 533","version":"0.4.0"}
120
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.512-05:00","v":0,"msg":"Replaying rows, count: 468","version":"0.4.0"}
121
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.809-05:00","v":0,"msg":"Remaining rows below delta count, proceeding towards swap","version":"0.4.0"}
122
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.809-05:00","v":0,"msg":"Performing swap!","version":"0.4.0"}
123
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:24.259-05:00","v":0,"msg":"Replaying rows, count: 449","version":"0.4.0"}
124
+ NOTICE: trigger "primary_to_audit_table_trigger" for relation "pgbench_accounts" does not exist, skipping
125
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:24.650-05:00","v":0,"msg":"Performing ANALYZE!","version":"0.4.0"}
126
+ INFO: analyzing "public.pgbench_accounts"
127
+ INFO: "pgbench_accounts": scanned 30000 of 32935 pages, containing 1821834 live rows and 6056 dead rows; 30000 rows in sample, 2000070 estimated total rows
128
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:24.941-05:00","v":0,"msg":"Validating constraints!","version":"0.4.0"}
129
+ NOTICE: table "pgosc_st_pgbench_accounts_714a8b" does not exist, skipping
130
+ {"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:26.159-05:00","v":0,"msg":"All tasks successfully completed","version":"0.4.0"}
131
+ ```
132
+
133
+ </details>
134
+
135
+
136
+ ## Conclusion
137
+
138
+ By tweaking `--pull-batch-count` to `2000` (replay 2k rows at once) and `--delta-count` to `200` (time to swap when remaining rows is <200), `pg-osc` was able to perform the schema change with no impact within very quick time. Depending on the database size and load on the table, you can further tune them to achieve desired impact. At some point this is going to plateau - I can imagine the replay factor not working quite well for say 100k commits/s workloads. So, YMMV.
@@ -1,8 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "thor"
2
4
 
3
5
  module PgOnlineSchemaChange
6
+ PULL_BATCH_COUNT = 1000
7
+ DELTA_COUNT = 20
4
8
  class CLI < Thor
5
- desc "perform", "Perform the set of operations to safely apply the schema change with minimal locks"
9
+ desc "perform", "Safely apply schema changes with minimal locks"
6
10
  method_option :alter_statement, aliases: "-a", type: :string, required: true,
7
11
  desc: "The ALTER statement to perform the schema change"
8
12
  method_option :schema, aliases: "-s", type: :string, required: true, default: "public",
@@ -11,7 +15,7 @@ module PgOnlineSchemaChange
11
15
  method_option :host, aliases: "-h", type: :string, required: true, desc: "Server host where the Database is located"
12
16
  method_option :username, aliases: "-u", type: :string, required: true, desc: "Username for the Database"
13
17
  method_option :port, aliases: "-p", type: :numeric, required: true, default: 5432, desc: "Port for the Database"
14
- method_option :password, aliases: "-w", type: :string, required: true, desc: "Password for the Database"
18
+ method_option :password, aliases: "-w", type: :string, required: true, desc: "DEPRECATED: Password for the Database. Please pass PGPASSWORD environment variable instead."
15
19
  method_option :verbose, aliases: "-v", type: :boolean, default: false, desc: "Emit logs in debug mode"
16
20
  method_option :drop, aliases: "-f", type: :boolean, default: false,
17
21
  desc: "Drop the original table in the end after the swap"
@@ -21,11 +25,19 @@ module PgOnlineSchemaChange
21
25
  desc: "Time to wait before killing backends to acquire lock and/or retrying upto 3 times. It will kill backends if --kill-backends is true, otherwise try upto 3 times and exit if it cannot acquire a lock."
22
26
  method_option :copy_statement, aliases: "-c", type: :string, required: false, default: "",
23
27
  desc: "Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pgosc copies data from the primary to the shadow table. More examples in README."
28
+ method_option :pull_batch_count, aliases: "-b", type: :numeric, required: false, default: PULL_BATCH_COUNT,
29
+ desc: "Number of rows to be replayed on each iteration after copy. This can be tuned for faster catch up and swap. Best used with delta-count."
30
+ method_option :delta_count, aliases: "-e", type: :numeric, required: false, default: DELTA_COUNT,
31
+ desc: "Indicates how many rows should be remaining before a swap should be performed. This can be tuned for faster catch up and swap, especially on highly volume tables. Best used with pull-batch-count."
24
32
 
25
33
  def perform
26
34
  client_options = Struct.new(*options.keys.map(&:to_sym)).new(*options.values)
35
+ PgOnlineSchemaChange.logger(verbose: client_options.verbose)
36
+
37
+ PgOnlineSchemaChange.logger.warn("DEPRECATED: -w is deprecated. Please pass PGPASSWORD environment variable instead.") if client_options.password
38
+
39
+ client_options.password = ENV["PGPASSWORD"] || client_options.password
27
40
 
28
- PgOnlineSchemaChange.logger = client_options.verbose
29
41
  PgOnlineSchemaChange::Orchestrate.run!(client_options)
30
42
  end
31
43
 
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pg"
2
4
 
3
5
  module PgOnlineSchemaChange
4
6
  class Client
5
7
  attr_accessor :alter_statement, :schema, :dbname, :host, :username, :port, :password, :connection, :table, :drop,
6
- :kill_backends, :wait_time_for_lock, :copy_statement
8
+ :kill_backends, :wait_time_for_lock, :copy_statement, :pull_batch_count, :delta_count
7
9
 
8
10
  def initialize(options)
9
11
  @alter_statement = options.alter_statement
@@ -16,7 +18,11 @@ module PgOnlineSchemaChange
16
18
  @drop = options.drop
17
19
  @kill_backends = options.kill_backends
18
20
  @wait_time_for_lock = options.wait_time_for_lock
21
+ @pull_batch_count = options.pull_batch_count
22
+ @delta_count = options.delta_count
23
+
19
24
  handle_copy_statement(options.copy_statement)
25
+ handle_validations
20
26
 
21
27
  @connection = PG.connect(
22
28
  dbname: @dbname,
@@ -26,17 +32,19 @@ module PgOnlineSchemaChange
26
32
  port: @port,
27
33
  )
28
34
 
29
- raise Error, "Not a valid ALTER statement: #{@alter_statement}" unless Query.alter_statement?(@alter_statement)
30
-
31
- unless Query.same_table?(@alter_statement)
32
- raise Error "All statements should belong to the same table: #{@alter_statement}"
33
- end
34
-
35
35
  @table = Query.table(@alter_statement)
36
36
 
37
37
  PgOnlineSchemaChange.logger.debug("Connection established")
38
38
  end
39
39
 
40
+ def handle_validations
41
+ raise Error, "Not a valid ALTER statement: #{@alter_statement}" unless Query.alter_statement?(@alter_statement)
42
+
43
+ return if Query.same_table?(@alter_statement)
44
+
45
+ raise Error "All statements should belong to the same table: #{@alter_statement}"
46
+ end
47
+
40
48
  def handle_copy_statement(statement)
41
49
  return if statement.nil? || statement == ""
42
50
 
@@ -1,4 +1,6 @@
1
- FUNC_FIX_SERIAL_SEQUENCE = <<~SQL.freeze
1
+ # frozen_string_literal: true
2
+
3
+ FUNC_FIX_SERIAL_SEQUENCE = <<~SQL
2
4
  CREATE OR REPLACE FUNCTION fix_serial_sequence(_table regclass, _newtable text)
3
5
  RETURNS void AS
4
6
  $func$
@@ -35,7 +37,7 @@ FUNC_FIX_SERIAL_SEQUENCE = <<~SQL.freeze
35
37
  $func$ LANGUAGE plpgsql VOLATILE;
36
38
  SQL
37
39
 
38
- FUNC_CREATE_TABLE_ALL = <<~SQL.freeze
40
+ FUNC_CREATE_TABLE_ALL = <<~SQL
39
41
  CREATE OR REPLACE FUNCTION create_table_all(source_table text, newsource_table text)
40
42
  RETURNS void language plpgsql
41
43
  as $$
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module PgOnlineSchemaChange
2
4
  module Helper
3
5
  def primary_key
@@ -15,7 +17,14 @@ module PgOnlineSchemaChange
15
17
  result = Store.send(:get, method)
16
18
  return result if result
17
19
 
18
- raise ArgumentError, "Method `#{method}` doesn't exist."
20
+ super
21
+ end
22
+
23
+ def respond_to_missing?(method_name, *args)
24
+ result = Store.send(:get, method)
25
+ return true if result
26
+
27
+ super
19
28
  end
20
29
  end
21
30
  end
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "securerandom"
2
4
 
3
5
  module PgOnlineSchemaChange
4
6
  class Orchestrate
5
- SWAP_STATEMENT_TIMEOUT = "5s".freeze
7
+ SWAP_STATEMENT_TIMEOUT = "5s"
6
8
 
7
9
  extend Helper
8
10
 
@@ -21,12 +23,22 @@ module PgOnlineSchemaChange
21
23
  Query.run(client.connection, FUNC_FIX_SERIAL_SEQUENCE)
22
24
  Query.run(client.connection, FUNC_CREATE_TABLE_ALL)
23
25
 
26
+ setup_store
27
+ end
28
+
29
+ def setup_store
24
30
  # Set this early on to ensure their creation and cleanup (unexpected)
25
31
  # happens at all times. IOW, the calls from Store.get always return
26
32
  # the same value.
27
33
  Store.set(:old_primary_table, "pgosc_op_table_#{client.table}")
28
- Store.set(:audit_table, "pgosc_at_#{client.table}_#{random_string}")
29
- Store.set(:shadow_table, "pgosc_st_#{client.table}_#{random_string}")
34
+ Store.set(:audit_table, "pgosc_at_#{client.table}_#{pgosc_identifier}")
35
+ Store.set(:operation_type_column, "operation_type_#{pgosc_identifier}")
36
+ Store.set(:trigger_time_column, "trigger_time_#{pgosc_identifier}")
37
+ Store.set(:audit_table_pk, "at_#{pgosc_identifier}_id")
38
+ Store.set(:audit_table_pk_sequence, "#{audit_table}_#{audit_table_pk}_seq")
39
+ Store.set(:shadow_table, "pgosc_st_#{client.table}_#{pgosc_identifier}")
40
+
41
+ Store.set(:foreign_key_statements, Query.get_foreign_keys_to_refresh(client, client.table))
30
42
  end
31
43
 
32
44
  def run!(options)
@@ -70,7 +82,7 @@ module PgOnlineSchemaChange
70
82
  reader = setup_signals!
71
83
  signal = reader.gets.chomp
72
84
 
73
- while !reader.closed? && IO.select([reader])
85
+ while !reader.closed? && IO.select([reader]) # rubocop:disable Lint/UnreachableLoop
74
86
  logger.info "Signal #{signal} received, cleaning up"
75
87
 
76
88
  client.connection.cancel
@@ -85,7 +97,7 @@ module PgOnlineSchemaChange
85
97
  logger.info("Setting up audit table", { audit_table: audit_table })
86
98
 
87
99
  sql = <<~SQL
88
- CREATE TABLE #{audit_table} (operation_type text, trigger_time timestamp, LIKE #{client.table});
100
+ CREATE TABLE #{audit_table} (#{audit_table_pk} SERIAL PRIMARY KEY, #{operation_type_column} text, #{trigger_time_column} timestamp, LIKE #{client.table});
89
101
  SQL
90
102
 
91
103
  Query.run(client.connection, sql)
@@ -109,13 +121,13 @@ module PgOnlineSchemaChange
109
121
  $$
110
122
  BEGIN
111
123
  IF ( TG_OP = 'INSERT') THEN
112
- INSERT INTO \"#{audit_table}\" select 'INSERT', now(), NEW.* ;
124
+ INSERT INTO \"#{audit_table}\" select nextval(\'#{audit_table_pk_sequence}\'), 'INSERT', clock_timestamp(), NEW.* ;
113
125
  RETURN NEW;
114
126
  ELSIF ( TG_OP = 'UPDATE') THEN
115
- INSERT INTO \"#{audit_table}\" select 'UPDATE', now(), NEW.* ;
127
+ INSERT INTO \"#{audit_table}\" select nextval(\'#{audit_table_pk_sequence}\'), 'UPDATE', clock_timestamp(), NEW.* ;
116
128
  RETURN NEW;
117
129
  ELSIF ( TG_OP = 'DELETE') THEN
118
- INSERT INTO \"#{audit_table}\" select 'DELETE', now(), OLD.* ;
130
+ INSERT INTO \"#{audit_table}\" select nextval(\'#{audit_table_pk_sequence}\'), 'DELETE', clock_timestamp(), OLD.* ;
119
131
  RETURN NEW;
120
132
  END IF;
121
133
  END;
@@ -153,7 +165,7 @@ module PgOnlineSchemaChange
153
165
  # re-uses transaction with serializable
154
166
  # Disabling vacuum to avoid any issues during the process
155
167
  result = Query.storage_parameters_for(client, client.table, true) || ""
156
- primary_table_storage_parameters = Store.set(:primary_table_storage_parameters, result)
168
+ Store.set(:primary_table_storage_parameters, result)
157
169
 
158
170
  logger.debug("Disabling vacuum on shadow and audit table",
159
171
  { shadow_table: shadow_table, audit_table: audit_table })
@@ -185,8 +197,7 @@ module PgOnlineSchemaChange
185
197
  # Begin the process to copy data into copy table
186
198
  # depending on the size of the table, this can be a time
187
199
  # taking operation.
188
- logger.info("Clearing contents of audit table before copy..",
189
- { shadow_table: shadow_table, parent_table: client.table })
200
+ logger.info("Clearing contents of audit table before copy..", { shadow_table: shadow_table, parent_table: client.table })
190
201
  Query.run(client.connection, "DELETE FROM #{audit_table}", true)
191
202
 
192
203
  logger.info("Copying contents..", { shadow_table: shadow_table, parent_table: client.table })
@@ -195,7 +206,7 @@ module PgOnlineSchemaChange
195
206
  return Query.run(client.connection, query, true)
196
207
  end
197
208
 
198
- sql = Query.copy_data_statement(client, shadow_table)
209
+ sql = Query.copy_data_statement(client, shadow_table, true)
199
210
  Query.run(client.connection, sql, true)
200
211
  ensure
201
212
  Query.run(client.connection, "COMMIT;") # commit the serializable transaction
@@ -212,7 +223,6 @@ module PgOnlineSchemaChange
212
223
  def swap!
213
224
  logger.info("Performing swap!")
214
225
 
215
- foreign_key_statements = Query.get_foreign_keys_to_refresh(client, client.table)
216
226
  storage_params_reset = primary_table_storage_parameters.empty? ? "" : "ALTER TABLE #{client.table} SET (#{primary_table_storage_parameters});"
217
227
 
218
228
  # From here on, all statements are carried out in a single
@@ -235,7 +245,7 @@ module PgOnlineSchemaChange
235
245
  DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
236
246
  SQL
237
247
 
238
- Query.run(client.connection, sql)
248
+ Query.run(client.connection, sql, opened)
239
249
  ensure
240
250
  Query.run(client.connection, "COMMIT;")
241
251
  Query.run(client.connection, "SET statement_timeout = 0;")
@@ -261,6 +271,7 @@ module PgOnlineSchemaChange
261
271
  shadow_table_drop = shadow_table ? "DROP TABLE IF EXISTS #{shadow_table}" : ""
262
272
 
263
273
  sql = <<~SQL
274
+ DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
264
275
  #{audit_table_drop};
265
276
  #{shadow_table_drop};
266
277
  #{primary_drop}
@@ -272,8 +283,10 @@ module PgOnlineSchemaChange
272
283
  Query.run(client.connection, sql)
273
284
  end
274
285
 
275
- private def random_string
276
- @random_string ||= SecureRandom.hex(3)
286
+ private
287
+
288
+ def pgosc_identifier
289
+ @pgosc_identifier ||= SecureRandom.hex(3)
277
290
  end
278
291
  end
279
292
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pg_query"
2
4
  require "pg"
3
5
 
@@ -5,7 +7,7 @@ module PgOnlineSchemaChange
5
7
  class Query
6
8
  extend Helper
7
9
 
8
- INDEX_SUFFIX = "_pgosc".freeze
10
+ INDEX_SUFFIX = "_pgosc"
9
11
  DROPPED_COLUMN_TYPE = :AT_DropColumn
10
12
  RENAMED_COLUMN_TYPE = :AT_RenameColumn
11
13
  LOCK_ATTEMPT = 4
@@ -15,28 +17,28 @@ module PgOnlineSchemaChange
15
17
  PgQuery.parse(query).tree.stmts.all? do |statement|
16
18
  statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt) || statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
17
19
  end
18
- rescue PgQuery::ParseError => e
20
+ rescue PgQuery::ParseError
19
21
  false
20
22
  end
21
23
 
22
24
  def same_table?(query)
23
- tables = PgQuery.parse(query).tree.stmts.map do |statement|
25
+ tables = PgQuery.parse(query).tree.stmts.filter_map do |statement|
24
26
  if statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt)
25
27
  statement.stmt.alter_table_stmt.relation.relname
26
28
  elsif statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
27
29
  statement.stmt.rename_stmt.relation.relname
28
30
  end
29
- end.compact
31
+ end
30
32
 
31
33
  tables.uniq.count == 1
32
- rescue PgQuery::ParseError => e
34
+ rescue PgQuery::ParseError
33
35
  false
34
36
  end
35
37
 
36
38
  def table(query)
37
- from_rename_statement = PgQuery.parse(query).tree.stmts.map do |statement|
39
+ from_rename_statement = PgQuery.parse(query).tree.stmts.filter_map do |statement|
38
40
  statement.stmt.rename_stmt&.relation&.relname
39
- end.compact[0]
41
+ end[0]
40
42
  PgQuery.parse(query).tables[0] || from_rename_statement
41
43
  end
42
44
 
@@ -48,7 +50,7 @@ module PgOnlineSchemaChange
48
50
  connection.async_exec("BEGIN;")
49
51
 
50
52
  result = connection.async_exec(query, &block)
51
- rescue Exception
53
+ rescue Exception # rubocop:disable Lint/RescueException
52
54
  connection.cancel if connection.transaction_status != PG::PQTRANS_IDLE
53
55
  connection.block
54
56
  logger.info("Exception raised, rolling back query", { rollback: true, query: query })
@@ -144,11 +146,11 @@ module PgOnlineSchemaChange
144
146
  end
145
147
 
146
148
  references.map do |row|
147
- if row["definition"].end_with?("NOT VALID")
148
- add_statement = "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]};"
149
- else
150
- add_statement = "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]} NOT VALID;"
151
- end
149
+ add_statement = if row["definition"].end_with?("NOT VALID")
150
+ "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]};"
151
+ else
152
+ "ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]} NOT VALID;"
153
+ end
152
154
 
153
155
  drop_statement = "ALTER TABLE #{row["table_on"]} DROP CONSTRAINT #{row["constraint_name"]};"
154
156
 
@@ -291,7 +293,7 @@ module PgOnlineSchemaChange
291
293
  client.connection.quote_ident(select_column)
292
294
  end
293
295
 
294
- sql = <<~SQL
296
+ <<~SQL
295
297
  INSERT INTO #{shadow_table}(#{insert_into_columns.join(", ")})
296
298
  SELECT #{select_columns.join(", ")}
297
299
  FROM ONLY #{client.table}
@@ -1,12 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
4
+
1
5
  module PgOnlineSchemaChange
2
6
  class Replay
3
7
  extend Helper
4
8
 
5
9
  class << self
6
- PULL_BATCH_COUNT = 1000
7
- DELTA_COUNT = 20
8
- RESERVED_COLUMNS = %w[operation_type trigger_time].freeze
9
-
10
10
  # This, picks PULL_BATCH_COUNT rows by primary key from audit_table,
11
11
  # replays it on the shadow_table. Once the batch is done,
12
12
  # it them deletes those PULL_BATCH_COUNT rows from audit_table. Then, pull another batch,
@@ -17,7 +17,7 @@ module PgOnlineSchemaChange
17
17
  loop do
18
18
  rows = rows_to_play
19
19
 
20
- raise CountBelowDelta if rows.count <= DELTA_COUNT
20
+ raise CountBelowDelta if rows.count <= client.delta_count
21
21
 
22
22
  play!(rows)
23
23
  end
@@ -25,7 +25,7 @@ module PgOnlineSchemaChange
25
25
 
26
26
  def rows_to_play(reuse_trasaction = false)
27
27
  select_query = <<~SQL
28
- SELECT * FROM #{audit_table} ORDER BY #{primary_key} LIMIT #{PULL_BATCH_COUNT};
28
+ SELECT * FROM #{audit_table} ORDER BY #{audit_table_pk} LIMIT #{client.pull_batch_count};
29
29
  SQL
30
30
 
31
31
  rows = []
@@ -34,6 +34,10 @@ module PgOnlineSchemaChange
34
34
  rows
35
35
  end
36
36
 
37
+ def reserved_columns
38
+ @reserved_columns ||= [trigger_time_column, operation_type_column, audit_table_pk]
39
+ end
40
+
37
41
  def play!(rows, reuse_trasaction = false)
38
42
  logger.info("Replaying rows, count: #{rows.size}")
39
43
 
@@ -44,7 +48,7 @@ module PgOnlineSchemaChange
44
48
 
45
49
  # Remove audit table cols, since we will be
46
50
  # re-mapping them for inserts and updates
47
- RESERVED_COLUMNS.each do |col|
51
+ reserved_columns.each do |col|
48
52
  new_row.delete(col)
49
53
  end
50
54
 
@@ -73,7 +77,7 @@ module PgOnlineSchemaChange
73
77
  client.connection.escape_string(value)
74
78
  end
75
79
 
76
- case row["operation_type"]
80
+ case row[operation_type_column]
77
81
  when "INSERT"
78
82
  values = new_row.map { |_, val| "'#{val}'" }.join(",")
79
83
 
@@ -83,7 +87,7 @@ module PgOnlineSchemaChange
83
87
  SQL
84
88
  to_be_replayed << sql
85
89
 
86
- to_be_deleted_rows << "'#{row[primary_key]}'"
90
+ to_be_deleted_rows << "'#{row[audit_table_pk]}'"
87
91
  when "UPDATE"
88
92
  set_values = new_row.map do |column, value|
89
93
  "#{column} = '#{value}'"
@@ -96,27 +100,29 @@ module PgOnlineSchemaChange
96
100
  SQL
97
101
  to_be_replayed << sql
98
102
 
99
- to_be_deleted_rows << "'#{row[primary_key]}'"
103
+ to_be_deleted_rows << "'#{row[audit_table_pk]}'"
100
104
  when "DELETE"
101
105
  sql = <<~SQL
102
106
  DELETE FROM #{shadow_table} WHERE #{primary_key}=\'#{row[primary_key]}\';
103
107
  SQL
104
108
  to_be_replayed << sql
105
109
 
106
- to_be_deleted_rows << "'#{row[primary_key]}'"
110
+ to_be_deleted_rows << "'#{row[audit_table_pk]}'"
107
111
  end
108
112
  end
109
113
 
110
114
  Query.run(client.connection, to_be_replayed.join, reuse_trasaction)
111
115
 
112
116
  # Delete items from the audit now that are replayed
113
- if to_be_deleted_rows.count >= 1
114
- delete_query = <<~SQL
115
- DELETE FROM #{audit_table} WHERE #{primary_key} IN (#{to_be_deleted_rows.join(",")})
116
- SQL
117
- Query.run(client.connection, delete_query, reuse_trasaction)
118
- end
117
+ return unless to_be_deleted_rows.count >= 1
118
+
119
+ delete_query = <<~SQL
120
+ DELETE FROM #{audit_table} WHERE #{audit_table_pk} IN (#{to_be_deleted_rows.join(",")})
121
+ SQL
122
+ Query.run(client.connection, delete_query, reuse_trasaction)
119
123
  end
120
124
  end
121
125
  end
122
126
  end
127
+
128
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
@@ -1,17 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "pg_query"
2
4
  require "pg"
3
5
 
4
6
  module PgOnlineSchemaChange
5
7
  class Store
6
8
  class << self
7
- @@object = {}
9
+ @object = {}
8
10
 
9
11
  def get(key)
10
- @@object[key.to_s] || @@object[key.to_sym]
12
+ @object ||= {}
13
+ @object[key.to_s] || @object[key.to_sym]
11
14
  end
12
15
 
13
16
  def set(key, value)
14
- @@object[key.to_sym] = value
17
+ @object ||= {}
18
+ @object[key.to_sym] = value
15
19
  end
16
20
  end
17
21
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PgOnlineSchemaChange
4
- VERSION = "0.3.0"
4
+ VERSION = "0.6.0"
5
5
  end
@@ -6,28 +6,24 @@ require "ougai"
6
6
  require "pg_online_schema_change/version"
7
7
  require "pg_online_schema_change/helper"
8
8
  require "pg_online_schema_change/functions"
9
- require "pg_online_schema_change/cli"
10
9
  require "pg_online_schema_change/client"
11
10
  require "pg_online_schema_change/query"
12
11
  require "pg_online_schema_change/store"
13
12
  require "pg_online_schema_change/replay"
14
13
  require "pg_online_schema_change/orchestrate"
14
+ require "pg_online_schema_change/cli"
15
15
 
16
16
  module PgOnlineSchemaChange
17
17
  class Error < StandardError; end
18
18
  class CountBelowDelta < StandardError; end
19
19
  class AccessExclusiveLockNotAcquired < StandardError; end
20
20
 
21
- def self.logger=(verbose)
22
- @@logger ||= begin
23
- logger = Ougai::Logger.new($stdout)
24
- logger.level = verbose ? Ougai::Logger::TRACE : Ougai::Logger::INFO
25
- logger.with_fields = { version: PgOnlineSchemaChange::VERSION }
26
- logger
27
- end
28
- end
29
-
30
- def self.logger
31
- @@logger
21
+ def self.logger(verbose: false)
22
+ @logger ||= begin
23
+ logger = Ougai::Logger.new($stdout)
24
+ logger.level = verbose ? Ougai::Logger::TRACE : Ougai::Logger::INFO
25
+ logger.with_fields = { version: PgOnlineSchemaChange::VERSION }
26
+ logger
27
+ end
32
28
  end
33
29
  end
data/scripts/release.sh CHANGED
@@ -11,8 +11,11 @@ gem build pg_online_schema_change.gemspec
11
11
  echo "=== Pushing gem ===="
12
12
  gem push pg_online_schema_change-$VERSION.gem
13
13
 
14
+ echo "=== Sleeping for 5s ===="
15
+ sleep 5
16
+
14
17
  echo "=== Building Image ===="
15
- docker build . --build-arg VERSION=$VERSION -t pg-osc
18
+ docker build . --build-arg VERSION=$VERSION -t shayonj/pg-osc:$VERSION
16
19
 
17
20
  echo "=== Tagging Image ===="
18
21
  docker image tag shayonj/pg-osc:$VERSION shayonj/pg-osc:latest
@@ -20,3 +23,6 @@ docker image tag shayonj/pg-osc:$VERSION shayonj/pg-osc:latest
20
23
  echo "=== Pushing Image ===="
21
24
  docker push shayonj/pg-osc:$VERSION
22
25
  docker push shayonj/pg-osc:latest
26
+
27
+ echo "=== Cleaning up ===="
28
+ rm pg_online_schema_change-$VERSION.gem
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_online_schema_change
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shayon Mukherjee
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-02-21 00:00:00.000000000 Z
11
+ date: 2022-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ougai
@@ -191,6 +191,7 @@ extra_rdoc_files: []
191
191
  files:
192
192
  - ".rspec"
193
193
  - ".rubocop.yml"
194
+ - ".rubocop_todo.yml"
194
195
  - ".ruby-version"
195
196
  - CHANGELOG.md
196
197
  - CODE_OF_CONDUCT.md
@@ -203,9 +204,11 @@ files:
203
204
  - bin/console
204
205
  - bin/pg-online-schema-change
205
206
  - bin/setup
206
- - diagrams/how-it-works.excalidraw
207
- - diagrams/how-it-works.png
208
207
  - docker-compose.yml
208
+ - docs/how-it-works.excalidraw
209
+ - docs/how-it-works.png
210
+ - docs/load-test-1.png
211
+ - docs/load-test.md
209
212
  - lib/pg_online_schema_change.rb
210
213
  - lib/pg_online_schema_change/cli.rb
211
214
  - lib/pg_online_schema_change/client.rb