pg_online_schema_change 0.3.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +57 -24
- data/.rubocop_todo.yml +44 -0
- data/CHANGELOG.md +19 -0
- data/CODE_OF_CONDUCT.md +1 -1
- data/Gemfile.lock +1 -2
- data/README.md +57 -32
- data/{diagrams → docs}/how-it-works.excalidraw +0 -0
- data/{diagrams → docs}/how-it-works.png +0 -0
- data/docs/load-test-1.png +0 -0
- data/docs/load-test.md +138 -0
- data/lib/pg_online_schema_change/cli.rb +15 -3
- data/lib/pg_online_schema_change/client.rb +15 -7
- data/lib/pg_online_schema_change/functions.rb +4 -2
- data/lib/pg_online_schema_change/helper.rb +10 -1
- data/lib/pg_online_schema_change/orchestrate.rb +29 -16
- data/lib/pg_online_schema_change/query.rb +16 -14
- data/lib/pg_online_schema_change/replay.rb +23 -17
- data/lib/pg_online_schema_change/store.rb +7 -3
- data/lib/pg_online_schema_change/version.rb +1 -1
- data/lib/pg_online_schema_change.rb +8 -12
- data/scripts/release.sh +7 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3bc78403dc036598371393d5be0914bd7150660eda9798fb827af2892d058396
|
4
|
+
data.tar.gz: 6dbfb6e41267accf02e0da8ba88d7ea5039aa368ddc497e40d81f8952bed811d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0683dd33e681162a1b16b471fbefc52021b66ccbb3235994fd0654c6d97052a5482e22e29c1280cbc332eb8510b1b4a571075b4e36f9a2565c359ad086100d05'
|
7
|
+
data.tar.gz: 51e1fcd9d8d8d4f2a3f5760dee1e51fbde5a45f3bf7dd9c2d048390856b3b71bfb23333b053e0a0e43c026e90a9f704efbc903760cd37e95eb5ac5ea24b66512
|
data/.rubocop.yml
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
2
|
+
|
1
3
|
require:
|
2
4
|
- rubocop-rspec
|
3
5
|
- rubocop-packaging
|
@@ -14,71 +16,70 @@ AllCops:
|
|
14
16
|
- "vendor/**/*"
|
15
17
|
|
16
18
|
Layout/HashAlignment:
|
17
|
-
EnforcedColonStyle:
|
18
|
-
|
19
|
-
- key
|
20
|
-
EnforcedHashRocketStyle:
|
21
|
-
- table
|
22
|
-
- key
|
19
|
+
EnforcedColonStyle: key
|
20
|
+
EnforcedHashRocketStyle: key
|
23
21
|
|
24
22
|
Layout/SpaceAroundEqualsInParameterDefault:
|
25
|
-
EnforcedStyle:
|
23
|
+
EnforcedStyle: space
|
26
24
|
|
27
25
|
Metrics/AbcSize:
|
28
|
-
|
26
|
+
Enabled: true
|
27
|
+
Max: 40
|
29
28
|
Exclude:
|
30
|
-
- "
|
29
|
+
- "spec/**/*"
|
31
30
|
|
32
31
|
Metrics/BlockLength:
|
32
|
+
Max: 100
|
33
33
|
Exclude:
|
34
34
|
- "*.gemspec"
|
35
35
|
- "Rakefile"
|
36
|
+
- "spec/**/*"
|
36
37
|
|
37
38
|
Metrics/ClassLength:
|
38
39
|
Exclude:
|
39
40
|
- "test/**/*"
|
40
41
|
|
41
42
|
Metrics/MethodLength:
|
42
|
-
Max:
|
43
|
+
Max: 30
|
43
44
|
Exclude:
|
44
45
|
- "test/**/*"
|
45
46
|
|
46
47
|
Metrics/ParameterLists:
|
47
|
-
Max:
|
48
|
+
Max: 5
|
48
49
|
|
49
50
|
Naming/MemoizedInstanceVariableName:
|
50
|
-
Enabled:
|
51
|
+
Enabled: true
|
51
52
|
|
52
53
|
Naming/VariableNumber:
|
53
|
-
Enabled:
|
54
|
-
|
55
|
-
Rake/Desc:
|
56
|
-
Enabled: false
|
54
|
+
Enabled: true
|
57
55
|
|
58
56
|
Style/BarePercentLiterals:
|
59
57
|
EnforcedStyle: percent_q
|
60
58
|
|
61
59
|
Style/ClassAndModuleChildren:
|
62
|
-
Enabled:
|
60
|
+
Enabled: true
|
63
61
|
|
64
62
|
Style/Documentation:
|
65
63
|
Enabled: false
|
66
64
|
|
67
65
|
Style/DoubleNegation:
|
68
|
-
Enabled:
|
66
|
+
Enabled: true
|
69
67
|
|
70
68
|
Style/EmptyMethod:
|
71
|
-
Enabled:
|
69
|
+
Enabled: true
|
72
70
|
|
73
71
|
Style/FrozenStringLiteralComment:
|
74
|
-
Enabled:
|
72
|
+
Enabled: true
|
75
73
|
|
76
74
|
Style/NumericPredicate:
|
77
|
-
Enabled:
|
75
|
+
Enabled: true
|
78
76
|
|
79
77
|
Style/StringLiterals:
|
80
78
|
EnforcedStyle: double_quotes
|
81
79
|
|
80
|
+
Style/StringLiteralsInInterpolation:
|
81
|
+
EnforcedStyle: double_quotes
|
82
|
+
|
82
83
|
Style/TrivialAccessors:
|
83
84
|
AllowPredicates: true
|
84
85
|
|
@@ -91,9 +92,41 @@ Style/TrailingCommaInArrayLiteral:
|
|
91
92
|
Style/TrailingCommaInHashLiteral:
|
92
93
|
EnforcedStyleForMultiline: comma
|
93
94
|
|
94
|
-
|
95
|
-
|
95
|
+
Layout/MultilineArrayBraceLayout:
|
96
|
+
Enabled: true
|
97
|
+
EnforcedStyle: symmetrical
|
96
98
|
|
97
|
-
|
99
|
+
Layout/MultilineHashBraceLayout:
|
98
100
|
Enabled: true
|
99
101
|
EnforcedStyle: symmetrical
|
102
|
+
|
103
|
+
Layout/MultilineAssignmentLayout:
|
104
|
+
Enabled: true
|
105
|
+
EnforcedStyle: same_line
|
106
|
+
|
107
|
+
Layout/FirstArrayElementIndentation:
|
108
|
+
Enabled: true
|
109
|
+
EnforcedStyle: consistent
|
110
|
+
|
111
|
+
Layout/FirstHashElementIndentation:
|
112
|
+
Enabled: true
|
113
|
+
EnforcedStyle: consistent
|
114
|
+
|
115
|
+
Layout/MultilineHashKeyLineBreaks:
|
116
|
+
Enabled: true
|
117
|
+
|
118
|
+
Layout/LineLength:
|
119
|
+
Enabled: true
|
120
|
+
Max: 250
|
121
|
+
|
122
|
+
Style/FormatStringToken:
|
123
|
+
Enabled: true
|
124
|
+
EnforcedStyle: template
|
125
|
+
|
126
|
+
RSpec/MessageSpies:
|
127
|
+
Enabled: true
|
128
|
+
EnforcedStyle: receive
|
129
|
+
|
130
|
+
RSpec/FilePath:
|
131
|
+
Enabled: true
|
132
|
+
SpecSuffixOnly: true
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2022-02-21 22:46:44 UTC using RuboCop version 1.23.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 2
|
10
|
+
# Configuration parameters: CountComments, CountAsOne.
|
11
|
+
Metrics/ClassLength:
|
12
|
+
Max: 233
|
13
|
+
|
14
|
+
# Offense count: 2
|
15
|
+
# Configuration parameters: IgnoredMethods.
|
16
|
+
Metrics/CyclomaticComplexity:
|
17
|
+
Max: 15
|
18
|
+
|
19
|
+
# Offense count: 2
|
20
|
+
# Configuration parameters: IgnoredMethods.
|
21
|
+
Metrics/PerceivedComplexity:
|
22
|
+
Max: 13
|
23
|
+
|
24
|
+
# Offense count: 1
|
25
|
+
Packaging/GemspecGit:
|
26
|
+
Exclude:
|
27
|
+
- 'pg_online_schema_change.gemspec'
|
28
|
+
|
29
|
+
# Offense count: 62
|
30
|
+
# Configuration parameters: CountAsOne.
|
31
|
+
RSpec/ExampleLength:
|
32
|
+
Max: 55
|
33
|
+
|
34
|
+
# Offense count: 38
|
35
|
+
RSpec/MultipleExpectations:
|
36
|
+
Max: 14
|
37
|
+
|
38
|
+
# Offense count: 6
|
39
|
+
# Configuration parameters: AllowedMethods.
|
40
|
+
# AllowedMethods: respond_to_missing?
|
41
|
+
Style/OptionalBooleanParameter:
|
42
|
+
Exclude:
|
43
|
+
- 'lib/pg_online_schema_change/query.rb'
|
44
|
+
- 'lib/pg_online_schema_change/replay.rb'
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,22 @@
|
|
1
|
+
## [0.5.0] - 2022-02-26
|
2
|
+
* Share some preliminary load test figures in https://github.com/shayonj/pg-osc/pull/54
|
3
|
+
* Reuse existing transaction open for reading table columns in https://github.com/shayonj/pg-osc/pull/53
|
4
|
+
* Start to deprecate --password with PGPASSWORD in https://github.com/shayonj/pg-osc/pull/56
|
5
|
+
* Introduce configurable PULL_BATCH_COUNT and DELTA_COUNT in https://github.com/shayonj/pg-osc/pull/57
|
6
|
+
|
7
|
+
## [0.4.0] - 2022-02-22
|
8
|
+
* Lint sourcecode, setup Rubocop proper and Lint in CI by @shayonj in https://github.com/shayonj/pg-osc/pull/46
|
9
|
+
* Uniquely identify operation_type column by @shayonj in https://github.com/shayonj/pg-osc/pull/50
|
10
|
+
* Introduce primary key on audit table for ordered reads by @shayonj in https://github.com/shayonj/pg-osc/pull/49
|
11
|
+
- This addresses an edge case with replay.
|
12
|
+
* Uniquely identify trigger_time column by @shayonj in https://github.com/shayonj/pg-osc/pull/51
|
13
|
+
* Abstract assertions into a helper function by @shayonj in https://github.com/shayonj/pg-osc/pull/52
|
14
|
+
|
15
|
+
## [0.3.0] - 2022-02-21
|
16
|
+
|
17
|
+
- Explicitly call dependencies and bump dependencies by @shayonj https://github.com/shayonj/pg-osc/pull/44
|
18
|
+
- Introduce Dockerfile and release process https://github.com/shayonj/pg-osc/pull/45
|
19
|
+
|
1
20
|
## [0.2.0] - 2022-02-17
|
2
21
|
|
3
22
|
- Use ISOLATION LEVEL SERIALIZABLE ([#42](https://github.com/shayonj/pg-osc/pull/42)) (props to @jfrost)
|
data/CODE_OF_CONDUCT.md
CHANGED
@@ -39,7 +39,7 @@ This Code of Conduct applies within all community spaces, and also applies when
|
|
39
39
|
|
40
40
|
## Enforcement
|
41
41
|
|
42
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at
|
42
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at shayonj@gmail.com. All complaints will be reviewed and investigated promptly and fairly.
|
43
43
|
|
44
44
|
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
|
45
45
|
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pg_online_schema_change (0.
|
4
|
+
pg_online_schema_change (0.5.0)
|
5
5
|
ougai (~> 2.0.0)
|
6
6
|
pg (~> 1.3.2)
|
7
7
|
pg_query (~> 2.1.3)
|
@@ -14,7 +14,6 @@ GEM
|
|
14
14
|
coderay (1.1.3)
|
15
15
|
diff-lcs (1.5.0)
|
16
16
|
google-protobuf (3.19.4)
|
17
|
-
google-protobuf (3.19.4-x86_64-linux)
|
18
17
|
method_source (1.0.0)
|
19
18
|
oj (3.13.11)
|
20
19
|
ougai (2.0.0)
|
data/README.md
CHANGED
@@ -16,8 +16,8 @@ pg-online-schema-change (`pg-osc`) is a tool for making schema changes (any `ALT
|
|
16
16
|
- [Installation](#installation)
|
17
17
|
- [Requirements](#requirements)
|
18
18
|
- [Usage](#usage)
|
19
|
-
- [How does it work](#how-does-it-work)
|
20
19
|
- [Prominent features](#prominent-features)
|
20
|
+
- [Load test](#load-test)
|
21
21
|
- [Examples](#examples)
|
22
22
|
* [Renaming a column](#renaming-a-column)
|
23
23
|
* [Multiple ALTER statements](#multiple-alter-statements)
|
@@ -25,6 +25,7 @@ pg-online-schema-change (`pg-osc`) is a tool for making schema changes (any `ALT
|
|
25
25
|
* [Backfill data](#backfill-data)
|
26
26
|
* [Running using Docker](#running-using-docker)
|
27
27
|
- [Caveats](#caveats)
|
28
|
+
- [How does it work](#how-does-it-work)
|
28
29
|
- [Development](#development)
|
29
30
|
- [Releasing](#releasing)
|
30
31
|
- [Contributing](#contributing)
|
@@ -75,13 +76,17 @@ Options:
|
|
75
76
|
-u, --username=USERNAME # Username for the Database
|
76
77
|
-p, --port=N # Port for the Database
|
77
78
|
# Default: 5432
|
78
|
-
-w, --password=PASSWORD # Password for the Database
|
79
|
+
-w, --password=PASSWORD # DEPRECATED: Password for the Database. Please pass PGPASSWORD environment variable instead.
|
79
80
|
-v, [--verbose], [--no-verbose] # Emit logs in debug mode
|
80
81
|
-f, [--drop], [--no-drop] # Drop the original table in the end after the swap
|
81
82
|
-k, [--kill-backends], [--no-kill-backends] # Kill other competing queries/backends when trying to acquire lock for the shadow table creation and swap. It will wait for --wait-time-for-lock duration before killing backends and try upto 3 times.
|
82
83
|
-w, [--wait-time-for-lock=N] # Time to wait before killing backends to acquire lock and/or retrying upto 3 times. It will kill backends if --kill-backends is true, otherwise try upto 3 times and exit if it cannot acquire a lock.
|
83
84
|
# Default: 10
|
84
|
-
-c, [--copy-statement=COPY_STATEMENT] # Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when
|
85
|
+
-c, [--copy-statement=COPY_STATEMENT] # Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pgosc copies data from the primary to the shadow table. More examples in README.
|
86
|
+
-b, [--pull-batch-count=N] # Number of rows to be replayed on each iteration after copy. This can be tuned for faster catch up and swap. Best used with delta-count.
|
87
|
+
# Default: 1000
|
88
|
+
-e, [--delta-count=N] # Indicates how many rows should be remaining before a swap should be performed. This can be tuned for faster catch up and swap, especially on highly volume tables. Best used with pull-batch-count.
|
89
|
+
# Default: 20
|
85
90
|
```
|
86
91
|
|
87
92
|
```
|
@@ -90,57 +95,39 @@ Usage:
|
|
90
95
|
|
91
96
|
print the version
|
92
97
|
```
|
93
|
-
## How does it work
|
94
|
-
|
95
|
-
- **Primary table**: A table against which a potential schema change is to be run
|
96
|
-
- **Shadow table**: A copy of an existing primary table
|
97
|
-
- **Audit table**: A table to store any updates/inserts/delete on a primary table
|
98
|
-
|
99
|
-
![how-it-works](diagrams/how-it-works.png)
|
100
|
-
|
101
|
-
|
102
|
-
1. Create an audit table to record changes made to the parent table.
|
103
|
-
2. Acquire a brief `ACCESS EXCLUSIVE` lock to add a trigger on the parent table (for inserts, updates, deletes) to the audit table.
|
104
|
-
3. Create a new shadow table and run ALTER/migration on the shadow table.
|
105
|
-
4. Copy all rows from the old table.
|
106
|
-
5. Build indexes on the new table.
|
107
|
-
6. Replay all changes accumulated in the audit table against the shadow table.
|
108
|
-
- Delete rows in the audit table as they are replayed.
|
109
|
-
7. Once the delta (remaining rows) is ~20 rows, acquire an `ACCESS EXCLUSIVE` lock against the parent table within a transaction and:
|
110
|
-
- swap table names (shadow table <> parent table).
|
111
|
-
- update references in other tables (FKs) by dropping and re-creating the FKs with a `NOT VALID`.
|
112
|
-
8. Runs `ANALYZE` on the new table.
|
113
|
-
9. Validates all FKs that were added with `NOT VALID`.
|
114
|
-
10. Drop parent (now old) table (OPTIONAL).
|
115
|
-
|
116
98
|
## Prominent features
|
117
99
|
- `pg-osc` supports when a column is being added, dropped or renamed with no data loss.
|
118
100
|
- `pg-osc` acquires minimal locks throughout the process (read more below on the caveats).
|
119
101
|
- Copies over indexes and Foreign keys.
|
120
102
|
- Optionally drop or retain old tables in the end.
|
103
|
+
- Tune how slow or fast should replays be from the audit/log table ([Replaying larger workloads](#replaying-larger-workloads)).
|
121
104
|
- Backfill old/new columns as data is copied from primary table to shadow table, and then perform the swap. [Example](#backfill-data)
|
122
105
|
- **TBD**: Ability to reverse the change with no data loss. [tracking issue](https://github.com/shayonj/pg-osc/issues/14)
|
123
106
|
|
107
|
+
## Load test
|
108
|
+
|
109
|
+
[More about the preliminary load test figures here](docs/load-test.md)
|
110
|
+
|
124
111
|
## Examples
|
125
112
|
|
126
113
|
### Renaming a column
|
127
114
|
```
|
115
|
+
export PGPASSWORD=""
|
128
116
|
pg-online-schema-change perform \
|
129
117
|
--alter-statement 'ALTER TABLE books RENAME COLUMN email TO new_email' \
|
130
118
|
--dbname "postgres" \
|
131
119
|
--host "localhost" \
|
132
120
|
--username "jamesbond" \
|
133
|
-
--password "" \
|
134
121
|
```
|
135
122
|
|
136
123
|
### Multiple ALTER statements
|
137
124
|
```
|
125
|
+
export PGPASSWORD=""
|
138
126
|
pg-online-schema-change perform \
|
139
127
|
--alter-statement 'ALTER TABLE books ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE; ALTER TABLE books RENAME COLUMN email TO new_email;' \
|
140
128
|
--dbname "postgres" \
|
141
129
|
--host "localhost" \
|
142
130
|
--username "jamesbond" \
|
143
|
-
--password "" \
|
144
131
|
--drop
|
145
132
|
```
|
146
133
|
|
@@ -148,13 +135,30 @@ pg-online-schema-change perform \
|
|
148
135
|
If the operation is being performed on a busy table, you can use `pg-osc`'s `kill-backend` functionality to kill other backends that may be competing with the `pg-osc` operation to acquire a lock for a brief while. The `ACCESS EXCLUSIVE` lock acquired by `pg-osc` is only held for a brief while and released after. You can tune how long `pg-osc` should wait before killing other backends (or if at all `pg-osc` should kill backends in the first place).
|
149
136
|
|
150
137
|
```
|
138
|
+
export PGPASSWORD=""
|
151
139
|
pg-online-schema-change perform \
|
152
140
|
--alter-statement 'ALTER TABLE books ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;' \
|
153
141
|
--dbname "postgres" \
|
154
142
|
--host "localhost" \
|
155
143
|
--username "jamesbond" \
|
156
|
-
--
|
157
|
-
--
|
144
|
+
--wait-time-for-lock 5 \
|
145
|
+
--kill-backends \
|
146
|
+
--drop
|
147
|
+
```
|
148
|
+
|
149
|
+
### Replaying larger workloads
|
150
|
+
If you have a table with high write volume, the default replay iteration may not suffice. That is - you may see that `pg-osc` is replaying 1000 rows (`pull-batch-count`) in one go from the audit table. `pg-osc` also waits until the remaining row count (`delta-count`) in audit table is 20 before making the swap. You can tune these values to be higher for faster catch up on these kind of workloads.
|
151
|
+
|
152
|
+
```
|
153
|
+
export PGPASSWORD=""
|
154
|
+
pg-online-schema-change perform \
|
155
|
+
--alter-statement 'ALTER TABLE books ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;' \
|
156
|
+
--dbname "postgres" \
|
157
|
+
--host "localhost" \
|
158
|
+
--username "jamesbond" \
|
159
|
+
--pull-batch-count 2000
|
160
|
+
--delta-count 500
|
161
|
+
--wait-time-for-lock 5 \
|
158
162
|
--kill-backends \
|
159
163
|
--drop
|
160
164
|
```
|
@@ -183,7 +187,6 @@ pg-online-schema-change perform \
|
|
183
187
|
--dbname "postgres" \
|
184
188
|
--host "localhost" \
|
185
189
|
--username "jamesbond" \
|
186
|
-
--password "" \
|
187
190
|
--copy-statement "/src/query.sql" \
|
188
191
|
--drop
|
189
192
|
```
|
@@ -197,7 +200,6 @@ docker run --network host -it --rm shayonj/pg-osc:latest \
|
|
197
200
|
--dbname "postgres" \
|
198
201
|
--host "localhost" \
|
199
202
|
--username "jamesbond" \
|
200
|
-
--password "" \
|
201
203
|
--drop
|
202
204
|
```
|
203
205
|
## Caveats
|
@@ -215,6 +217,29 @@ docker run --network host -it --rm shayonj/pg-osc:latest \
|
|
215
217
|
- Can be fixed in future releases. Feel free to open a feature req.
|
216
218
|
- Foreign keys are dropped & re-added to referencing tables with a `NOT VALID`. A follow on `VALIDATE CONSTRAINT` is run.
|
217
219
|
- Ensures that integrity is maintained and re-introducing FKs doesn't acquire additional locks, hence the `NOT VALID`.
|
220
|
+
## How does it work
|
221
|
+
|
222
|
+
- **Primary table**: A table against which a potential schema change is to be run
|
223
|
+
- **Shadow table**: A copy of an existing primary table
|
224
|
+
- **Audit table**: A table to store any updates/inserts/delete on a primary table
|
225
|
+
|
226
|
+
![how-it-works](docs/how-it-works.png)
|
227
|
+
|
228
|
+
|
229
|
+
1. Create an audit table to record changes made to the parent table.
|
230
|
+
2. Acquire a brief `ACCESS EXCLUSIVE` lock to add a trigger on the parent table (for inserts, updates, deletes) to the audit table.
|
231
|
+
3. Create a new shadow table and run ALTER/migration on the shadow table.
|
232
|
+
4. Copy all rows from the old table.
|
233
|
+
5. Build indexes on the new table.
|
234
|
+
6. Replay all changes accumulated in the audit table against the shadow table.
|
235
|
+
- Delete rows in the audit table as they are replayed.
|
236
|
+
7. Once the delta (remaining rows) is ~20 rows, acquire an `ACCESS EXCLUSIVE` lock against the parent table within a transaction and:
|
237
|
+
- swap table names (shadow table <> parent table).
|
238
|
+
- update references in other tables (FKs) by dropping and re-creating the FKs with a `NOT VALID`.
|
239
|
+
8. Runs `ANALYZE` on the new table.
|
240
|
+
9. Validates all FKs that were added with `NOT VALID`.
|
241
|
+
10. Drop parent (now old) table (OPTIONAL).
|
242
|
+
|
218
243
|
## Development
|
219
244
|
|
220
245
|
- Install ruby 3.0
|
File without changes
|
File without changes
|
Binary file
|
data/docs/load-test.md
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
# Preliminary Load Test
|
2
|
+
|
3
|
+
## pg-osc: No downtime schema changes with 7K+ writes/s & 12k+ reads/s
|
4
|
+
|
5
|
+
This is a very basic load test performed with `pgbench` against a single instance PostgreSQL DB running on DigitialOcean with the following configuration:
|
6
|
+
|
7
|
+
- **128GB RAM**
|
8
|
+
- **32vCPU**
|
9
|
+
- **695GB Disk**
|
10
|
+
- Trasanction based connection pool with **500 pool limit**
|
11
|
+
|
12
|
+
Total time taken to run schema change: **<3mins**
|
13
|
+
|
14
|
+
## Simulating load with pgbench
|
15
|
+
|
16
|
+
**Initialize**
|
17
|
+
```
|
18
|
+
pgbench -p $PORT --initialize -s 20 -F 20 --foreign-keys --host $HOST -U $USERNAME -d $DB
|
19
|
+
```
|
20
|
+
|
21
|
+
This creates bunch of pgbench tables. The table being used with `pg-osc` is `pgbench_accounts` which has FKs and also references by other tables with FKS, containing 2M rows.
|
22
|
+
|
23
|
+
**Begin**
|
24
|
+
```
|
25
|
+
pgbench -p $PORT -j 72 -c 288 -T 500 -r --host $DB_HOST -U $USERNAME -d $DB
|
26
|
+
```
|
27
|
+
|
28
|
+
## Running pg-osc
|
29
|
+
|
30
|
+
Simple `ALTER` statement for experimentation purposes.
|
31
|
+
|
32
|
+
```sql
|
33
|
+
ALTER TABLE pgbench_accounts ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;
|
34
|
+
```
|
35
|
+
|
36
|
+
**Execution**
|
37
|
+
|
38
|
+
```bash
|
39
|
+
bundle exec bin/pg-online-schema-change perform \
|
40
|
+
-a 'ALTER TABLE pgbench_accounts ADD COLUMN "purchased" BOOLEAN DEFAULT FALSE;' \
|
41
|
+
-d "pool" \
|
42
|
+
-p 25061
|
43
|
+
-h "..." \
|
44
|
+
-u "..." \
|
45
|
+
--pull-batch-count 2000 \
|
46
|
+
--delta-count 200
|
47
|
+
```
|
48
|
+
|
49
|
+
## Outcome
|
50
|
+
|
51
|
+
**pgbench results**
|
52
|
+
|
53
|
+
```
|
54
|
+
number of transactions actually processed: 1060382
|
55
|
+
latency average = 144.874 ms
|
56
|
+
tps = 1767.057392 (including connections establishing)
|
57
|
+
tps = 1777.971823 (excluding connections establishing)
|
58
|
+
statement latencies in milliseconds:
|
59
|
+
0.479 \set aid random(1, 100000 * :scale)
|
60
|
+
0.409 \set bid random(1, 1 * :scale)
|
61
|
+
0.247 \set tid random(1, 10 * :scale)
|
62
|
+
0.208 \set delta random(-5000, 5000)
|
63
|
+
3.136 BEGIN;
|
64
|
+
4.243 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
|
65
|
+
4.488 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
|
66
|
+
71.017 UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
|
67
|
+
46.689 UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
|
68
|
+
4.035 INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
|
69
|
+
4.166 END;
|
70
|
+
```
|
71
|
+
|
72
|
+
**Metrics**
|
73
|
+
![load-test](load-test-1.png)
|
74
|
+
|
75
|
+
**New table structure**
|
76
|
+
|
77
|
+
Added `purchased` column.
|
78
|
+
|
79
|
+
```
|
80
|
+
defaultdb=> \d+ pgbench_accounts;
|
81
|
+
Table "public.pgbench_accounts"
|
82
|
+
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
|
83
|
+
-----------+---------------+-----------+----------+---------+----------+--------------+-------------
|
84
|
+
aid | integer | | not null | | plain | |
|
85
|
+
bid | integer | | | | plain | |
|
86
|
+
abalance | integer | | | | plain | |
|
87
|
+
filler | character(84) | | | | extended | |
|
88
|
+
purchased | boolean | | | false | plain | |
|
89
|
+
Indexes:
|
90
|
+
"pgosc_st_pgbench_accounts_815029_pkey" PRIMARY KEY, btree (aid)
|
91
|
+
Foreign-key constraints:
|
92
|
+
"pgbench_accounts_bid_fkey" FOREIGN KEY (bid) REFERENCES pgbench_branches(bid)
|
93
|
+
Referenced by:
|
94
|
+
TABLE "pgbench_history" CONSTRAINT "pgbench_history_aid_fkey" FOREIGN KEY (aid) REFERENCES pgbench_accounts(aid)
|
95
|
+
Options: autovacuum_enabled=false, fillfactor=20
|
96
|
+
```
|
97
|
+
|
98
|
+
**Logs**
|
99
|
+
|
100
|
+
<details>
|
101
|
+
<summary>Logs from pg-osc</summary>
|
102
|
+
|
103
|
+
```json
|
104
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:01.147-05:00","v":0,"msg":"Setting up audit table","audit_table":"pgosc_at_pgbench_accounts_714a8b","version":"0.4.0"}
|
105
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:01.660-05:00","v":0,"msg":"Setting up triggers","version":"0.4.0"}
|
106
|
+
NOTICE: trigger "primary_to_audit_table_trigger" for relation "pgbench_accounts" does not exist, skipping
|
107
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:01.814-05:00","v":0,"msg":"Setting up shadow table","shadow_table":"pgosc_st_pgbench_accounts_714a8b","version":"0.4.0"}
|
108
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:02.169-05:00","v":0,"msg":"Running alter statement on shadow table","shadow_table":"pgosc_st_pgbench_accounts_714a8b","parent_table":"pgbench_accounts","version":"0.4.0"}
|
109
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:02.204-05:00","v":0,"msg":"Clearing contents of audit table before copy..","shadow_table":"pgosc_st_pgbench_accounts_714a8b","parent_table":"pgbench_accounts","version":"0.4.0"}
|
110
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:02.240-05:00","v":0,"msg":"Copying contents..","shadow_table":"pgosc_st_pgbench_accounts_714a8b","parent_table":"pgbench_accounts","version":"0.4.0"}
|
111
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:20.481-05:00","v":0,"msg":"Performing ANALYZE!","version":"0.4.0"}
|
112
|
+
INFO: analyzing "public.pgbench_accounts"
|
113
|
+
INFO: "pgbench_accounts": scanned 30000 of 166667 pages, containing 360000 live rows and 200 dead rows; 30000 rows in sample, 2000004 estimated total rows
|
114
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:21.078-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
|
115
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:21.580-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
|
116
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:22.022-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
|
117
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:22.490-05:00","v":0,"msg":"Replaying rows, count: 2000","version":"0.4.0"}
|
118
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:22.866-05:00","v":0,"msg":"Replaying rows, count: 661","version":"0.4.0"}
|
119
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.212-05:00","v":0,"msg":"Replaying rows, count: 533","version":"0.4.0"}
|
120
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.512-05:00","v":0,"msg":"Replaying rows, count: 468","version":"0.4.0"}
|
121
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.809-05:00","v":0,"msg":"Remaining rows below delta count, proceeding towards swap","version":"0.4.0"}
|
122
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:23.809-05:00","v":0,"msg":"Performing swap!","version":"0.4.0"}
|
123
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:24.259-05:00","v":0,"msg":"Replaying rows, count: 449","version":"0.4.0"}
|
124
|
+
NOTICE: trigger "primary_to_audit_table_trigger" for relation "pgbench_accounts" does not exist, skipping
|
125
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:24.650-05:00","v":0,"msg":"Performing ANALYZE!","version":"0.4.0"}
|
126
|
+
INFO: analyzing "public.pgbench_accounts"
|
127
|
+
INFO: "pgbench_accounts": scanned 30000 of 32935 pages, containing 1821834 live rows and 6056 dead rows; 30000 rows in sample, 2000070 estimated total rows
|
128
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:24.941-05:00","v":0,"msg":"Validating constraints!","version":"0.4.0"}
|
129
|
+
NOTICE: table "pgosc_st_pgbench_accounts_714a8b" does not exist, skipping
|
130
|
+
{"name":"pg-online-schema-change","hostname":"MacBook-Pro.local","pid":13263,"level":30,"time":"2022-02-25T17:22:26.159-05:00","v":0,"msg":"All tasks successfully completed","version":"0.4.0"}
|
131
|
+
```
|
132
|
+
|
133
|
+
</details>
|
134
|
+
|
135
|
+
|
136
|
+
## Conclusion
|
137
|
+
|
138
|
+
By tweaking `--pull-batch-count` to `2000` (replay 2k rows at once) and `--delta-count` to `200` (time to swap when remaining rows is <200), `pg-osc` was able to perform the schema change with no impact within very quick time. Depending on the database size and load on the table, you can further tune them to achieve desired impact. At some point this is going to plateau - I can imagine the replay factor not working quite well for say 100k commits/s workloads. So, YMMV.
|
@@ -1,8 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "thor"
|
2
4
|
|
3
5
|
module PgOnlineSchemaChange
|
6
|
+
PULL_BATCH_COUNT = 1000
|
7
|
+
DELTA_COUNT = 20
|
4
8
|
class CLI < Thor
|
5
|
-
desc "perform", "
|
9
|
+
desc "perform", "Safely apply schema changes with minimal locks"
|
6
10
|
method_option :alter_statement, aliases: "-a", type: :string, required: true,
|
7
11
|
desc: "The ALTER statement to perform the schema change"
|
8
12
|
method_option :schema, aliases: "-s", type: :string, required: true, default: "public",
|
@@ -11,7 +15,7 @@ module PgOnlineSchemaChange
|
|
11
15
|
method_option :host, aliases: "-h", type: :string, required: true, desc: "Server host where the Database is located"
|
12
16
|
method_option :username, aliases: "-u", type: :string, required: true, desc: "Username for the Database"
|
13
17
|
method_option :port, aliases: "-p", type: :numeric, required: true, default: 5432, desc: "Port for the Database"
|
14
|
-
method_option :password, aliases: "-w", type: :string, required: true, desc: "Password for the Database"
|
18
|
+
method_option :password, aliases: "-w", type: :string, required: true, desc: "DEPRECATED: Password for the Database. Please pass PGPASSWORD environment variable instead."
|
15
19
|
method_option :verbose, aliases: "-v", type: :boolean, default: false, desc: "Emit logs in debug mode"
|
16
20
|
method_option :drop, aliases: "-f", type: :boolean, default: false,
|
17
21
|
desc: "Drop the original table in the end after the swap"
|
@@ -21,11 +25,19 @@ module PgOnlineSchemaChange
|
|
21
25
|
desc: "Time to wait before killing backends to acquire lock and/or retrying upto 3 times. It will kill backends if --kill-backends is true, otherwise try upto 3 times and exit if it cannot acquire a lock."
|
22
26
|
method_option :copy_statement, aliases: "-c", type: :string, required: false, default: "",
|
23
27
|
desc: "Takes a .sql file location where you can provide a custom query to be played (ex: backfills) when pgosc copies data from the primary to the shadow table. More examples in README."
|
28
|
+
method_option :pull_batch_count, aliases: "-b", type: :numeric, required: false, default: PULL_BATCH_COUNT,
|
29
|
+
desc: "Number of rows to be replayed on each iteration after copy. This can be tuned for faster catch up and swap. Best used with delta-count."
|
30
|
+
method_option :delta_count, aliases: "-e", type: :numeric, required: false, default: DELTA_COUNT,
|
31
|
+
desc: "Indicates how many rows should be remaining before a swap should be performed. This can be tuned for faster catch up and swap, especially on highly volume tables. Best used with pull-batch-count."
|
24
32
|
|
25
33
|
def perform
|
26
34
|
client_options = Struct.new(*options.keys.map(&:to_sym)).new(*options.values)
|
35
|
+
PgOnlineSchemaChange.logger(verbose: client_options.verbose)
|
36
|
+
|
37
|
+
PgOnlineSchemaChange.logger.warn("DEPRECATED: -w is deprecated. Please pass PGPASSWORD environment variable instead.") if client_options.password
|
38
|
+
|
39
|
+
client_options.password = ENV["PGPASSWORD"] || client_options.password
|
27
40
|
|
28
|
-
PgOnlineSchemaChange.logger = client_options.verbose
|
29
41
|
PgOnlineSchemaChange::Orchestrate.run!(client_options)
|
30
42
|
end
|
31
43
|
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "pg"
|
2
4
|
|
3
5
|
module PgOnlineSchemaChange
|
4
6
|
class Client
|
5
7
|
attr_accessor :alter_statement, :schema, :dbname, :host, :username, :port, :password, :connection, :table, :drop,
|
6
|
-
:kill_backends, :wait_time_for_lock, :copy_statement
|
8
|
+
:kill_backends, :wait_time_for_lock, :copy_statement, :pull_batch_count, :delta_count
|
7
9
|
|
8
10
|
def initialize(options)
|
9
11
|
@alter_statement = options.alter_statement
|
@@ -16,7 +18,11 @@ module PgOnlineSchemaChange
|
|
16
18
|
@drop = options.drop
|
17
19
|
@kill_backends = options.kill_backends
|
18
20
|
@wait_time_for_lock = options.wait_time_for_lock
|
21
|
+
@pull_batch_count = options.pull_batch_count
|
22
|
+
@delta_count = options.delta_count
|
23
|
+
|
19
24
|
handle_copy_statement(options.copy_statement)
|
25
|
+
handle_validations
|
20
26
|
|
21
27
|
@connection = PG.connect(
|
22
28
|
dbname: @dbname,
|
@@ -26,17 +32,19 @@ module PgOnlineSchemaChange
|
|
26
32
|
port: @port,
|
27
33
|
)
|
28
34
|
|
29
|
-
raise Error, "Not a valid ALTER statement: #{@alter_statement}" unless Query.alter_statement?(@alter_statement)
|
30
|
-
|
31
|
-
unless Query.same_table?(@alter_statement)
|
32
|
-
raise Error "All statements should belong to the same table: #{@alter_statement}"
|
33
|
-
end
|
34
|
-
|
35
35
|
@table = Query.table(@alter_statement)
|
36
36
|
|
37
37
|
PgOnlineSchemaChange.logger.debug("Connection established")
|
38
38
|
end
|
39
39
|
|
40
|
+
def handle_validations
|
41
|
+
raise Error, "Not a valid ALTER statement: #{@alter_statement}" unless Query.alter_statement?(@alter_statement)
|
42
|
+
|
43
|
+
return if Query.same_table?(@alter_statement)
|
44
|
+
|
45
|
+
raise Error "All statements should belong to the same table: #{@alter_statement}"
|
46
|
+
end
|
47
|
+
|
40
48
|
def handle_copy_statement(statement)
|
41
49
|
return if statement.nil? || statement == ""
|
42
50
|
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
FUNC_FIX_SERIAL_SEQUENCE = <<~SQL
|
2
4
|
CREATE OR REPLACE FUNCTION fix_serial_sequence(_table regclass, _newtable text)
|
3
5
|
RETURNS void AS
|
4
6
|
$func$
|
@@ -35,7 +37,7 @@ FUNC_FIX_SERIAL_SEQUENCE = <<~SQL.freeze
|
|
35
37
|
$func$ LANGUAGE plpgsql VOLATILE;
|
36
38
|
SQL
|
37
39
|
|
38
|
-
FUNC_CREATE_TABLE_ALL = <<~SQL
|
40
|
+
FUNC_CREATE_TABLE_ALL = <<~SQL
|
39
41
|
CREATE OR REPLACE FUNCTION create_table_all(source_table text, newsource_table text)
|
40
42
|
RETURNS void language plpgsql
|
41
43
|
as $$
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module PgOnlineSchemaChange
|
2
4
|
module Helper
|
3
5
|
def primary_key
|
@@ -15,7 +17,14 @@ module PgOnlineSchemaChange
|
|
15
17
|
result = Store.send(:get, method)
|
16
18
|
return result if result
|
17
19
|
|
18
|
-
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def respond_to_missing?(method_name, *args)
|
24
|
+
result = Store.send(:get, method)
|
25
|
+
return true if result
|
26
|
+
|
27
|
+
super
|
19
28
|
end
|
20
29
|
end
|
21
30
|
end
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "securerandom"
|
2
4
|
|
3
5
|
module PgOnlineSchemaChange
|
4
6
|
class Orchestrate
|
5
|
-
SWAP_STATEMENT_TIMEOUT = "5s"
|
7
|
+
SWAP_STATEMENT_TIMEOUT = "5s"
|
6
8
|
|
7
9
|
extend Helper
|
8
10
|
|
@@ -21,12 +23,22 @@ module PgOnlineSchemaChange
|
|
21
23
|
Query.run(client.connection, FUNC_FIX_SERIAL_SEQUENCE)
|
22
24
|
Query.run(client.connection, FUNC_CREATE_TABLE_ALL)
|
23
25
|
|
26
|
+
setup_store
|
27
|
+
end
|
28
|
+
|
29
|
+
def setup_store
|
24
30
|
# Set this early on to ensure their creation and cleanup (unexpected)
|
25
31
|
# happens at all times. IOW, the calls from Store.get always return
|
26
32
|
# the same value.
|
27
33
|
Store.set(:old_primary_table, "pgosc_op_table_#{client.table}")
|
28
|
-
Store.set(:audit_table, "pgosc_at_#{client.table}_#{
|
29
|
-
Store.set(:
|
34
|
+
Store.set(:audit_table, "pgosc_at_#{client.table}_#{pgosc_identifier}")
|
35
|
+
Store.set(:operation_type_column, "operation_type_#{pgosc_identifier}")
|
36
|
+
Store.set(:trigger_time_column, "trigger_time_#{pgosc_identifier}")
|
37
|
+
Store.set(:audit_table_pk, "at_#{pgosc_identifier}_id")
|
38
|
+
Store.set(:audit_table_pk_sequence, "#{audit_table}_#{audit_table_pk}_seq")
|
39
|
+
Store.set(:shadow_table, "pgosc_st_#{client.table}_#{pgosc_identifier}")
|
40
|
+
|
41
|
+
Store.set(:foreign_key_statements, Query.get_foreign_keys_to_refresh(client, client.table))
|
30
42
|
end
|
31
43
|
|
32
44
|
def run!(options)
|
@@ -70,7 +82,7 @@ module PgOnlineSchemaChange
|
|
70
82
|
reader = setup_signals!
|
71
83
|
signal = reader.gets.chomp
|
72
84
|
|
73
|
-
while !reader.closed? && IO.select([reader])
|
85
|
+
while !reader.closed? && IO.select([reader]) # rubocop:disable Lint/UnreachableLoop
|
74
86
|
logger.info "Signal #{signal} received, cleaning up"
|
75
87
|
|
76
88
|
client.connection.cancel
|
@@ -85,7 +97,7 @@ module PgOnlineSchemaChange
|
|
85
97
|
logger.info("Setting up audit table", { audit_table: audit_table })
|
86
98
|
|
87
99
|
sql = <<~SQL
|
88
|
-
CREATE TABLE #{audit_table} (
|
100
|
+
CREATE TABLE #{audit_table} (#{audit_table_pk} SERIAL PRIMARY KEY, #{operation_type_column} text, #{trigger_time_column} timestamp, LIKE #{client.table});
|
89
101
|
SQL
|
90
102
|
|
91
103
|
Query.run(client.connection, sql)
|
@@ -109,13 +121,13 @@ module PgOnlineSchemaChange
|
|
109
121
|
$$
|
110
122
|
BEGIN
|
111
123
|
IF ( TG_OP = 'INSERT') THEN
|
112
|
-
INSERT INTO \"#{audit_table}\" select 'INSERT',
|
124
|
+
INSERT INTO \"#{audit_table}\" select nextval(\'#{audit_table_pk_sequence}\'), 'INSERT', clock_timestamp(), NEW.* ;
|
113
125
|
RETURN NEW;
|
114
126
|
ELSIF ( TG_OP = 'UPDATE') THEN
|
115
|
-
INSERT INTO \"#{audit_table}\" select 'UPDATE',
|
127
|
+
INSERT INTO \"#{audit_table}\" select nextval(\'#{audit_table_pk_sequence}\'), 'UPDATE', clock_timestamp(), NEW.* ;
|
116
128
|
RETURN NEW;
|
117
129
|
ELSIF ( TG_OP = 'DELETE') THEN
|
118
|
-
INSERT INTO \"#{audit_table}\" select 'DELETE',
|
130
|
+
INSERT INTO \"#{audit_table}\" select nextval(\'#{audit_table_pk_sequence}\'), 'DELETE', clock_timestamp(), OLD.* ;
|
119
131
|
RETURN NEW;
|
120
132
|
END IF;
|
121
133
|
END;
|
@@ -153,7 +165,7 @@ module PgOnlineSchemaChange
|
|
153
165
|
# re-uses transaction with serializable
|
154
166
|
# Disabling vacuum to avoid any issues during the process
|
155
167
|
result = Query.storage_parameters_for(client, client.table, true) || ""
|
156
|
-
|
168
|
+
Store.set(:primary_table_storage_parameters, result)
|
157
169
|
|
158
170
|
logger.debug("Disabling vacuum on shadow and audit table",
|
159
171
|
{ shadow_table: shadow_table, audit_table: audit_table })
|
@@ -185,8 +197,7 @@ module PgOnlineSchemaChange
|
|
185
197
|
# Begin the process to copy data into copy table
|
186
198
|
# depending on the size of the table, this can be a time
|
187
199
|
# taking operation.
|
188
|
-
logger.info("Clearing contents of audit table before copy..",
|
189
|
-
{ shadow_table: shadow_table, parent_table: client.table })
|
200
|
+
logger.info("Clearing contents of audit table before copy..", { shadow_table: shadow_table, parent_table: client.table })
|
190
201
|
Query.run(client.connection, "DELETE FROM #{audit_table}", true)
|
191
202
|
|
192
203
|
logger.info("Copying contents..", { shadow_table: shadow_table, parent_table: client.table })
|
@@ -195,7 +206,7 @@ module PgOnlineSchemaChange
|
|
195
206
|
return Query.run(client.connection, query, true)
|
196
207
|
end
|
197
208
|
|
198
|
-
sql = Query.copy_data_statement(client, shadow_table)
|
209
|
+
sql = Query.copy_data_statement(client, shadow_table, true)
|
199
210
|
Query.run(client.connection, sql, true)
|
200
211
|
ensure
|
201
212
|
Query.run(client.connection, "COMMIT;") # commit the serializable transaction
|
@@ -212,7 +223,6 @@ module PgOnlineSchemaChange
|
|
212
223
|
def swap!
|
213
224
|
logger.info("Performing swap!")
|
214
225
|
|
215
|
-
foreign_key_statements = Query.get_foreign_keys_to_refresh(client, client.table)
|
216
226
|
storage_params_reset = primary_table_storage_parameters.empty? ? "" : "ALTER TABLE #{client.table} SET (#{primary_table_storage_parameters});"
|
217
227
|
|
218
228
|
# From here on, all statements are carried out in a single
|
@@ -235,7 +245,7 @@ module PgOnlineSchemaChange
|
|
235
245
|
DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
|
236
246
|
SQL
|
237
247
|
|
238
|
-
Query.run(client.connection, sql)
|
248
|
+
Query.run(client.connection, sql, opened)
|
239
249
|
ensure
|
240
250
|
Query.run(client.connection, "COMMIT;")
|
241
251
|
Query.run(client.connection, "SET statement_timeout = 0;")
|
@@ -261,6 +271,7 @@ module PgOnlineSchemaChange
|
|
261
271
|
shadow_table_drop = shadow_table ? "DROP TABLE IF EXISTS #{shadow_table}" : ""
|
262
272
|
|
263
273
|
sql = <<~SQL
|
274
|
+
DROP TRIGGER IF EXISTS primary_to_audit_table_trigger ON #{client.table};
|
264
275
|
#{audit_table_drop};
|
265
276
|
#{shadow_table_drop};
|
266
277
|
#{primary_drop}
|
@@ -272,8 +283,10 @@ module PgOnlineSchemaChange
|
|
272
283
|
Query.run(client.connection, sql)
|
273
284
|
end
|
274
285
|
|
275
|
-
private
|
276
|
-
|
286
|
+
private
|
287
|
+
|
288
|
+
def pgosc_identifier
|
289
|
+
@pgosc_identifier ||= SecureRandom.hex(3)
|
277
290
|
end
|
278
291
|
end
|
279
292
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "pg_query"
|
2
4
|
require "pg"
|
3
5
|
|
@@ -5,7 +7,7 @@ module PgOnlineSchemaChange
|
|
5
7
|
class Query
|
6
8
|
extend Helper
|
7
9
|
|
8
|
-
INDEX_SUFFIX = "_pgosc"
|
10
|
+
INDEX_SUFFIX = "_pgosc"
|
9
11
|
DROPPED_COLUMN_TYPE = :AT_DropColumn
|
10
12
|
RENAMED_COLUMN_TYPE = :AT_RenameColumn
|
11
13
|
LOCK_ATTEMPT = 4
|
@@ -15,28 +17,28 @@ module PgOnlineSchemaChange
|
|
15
17
|
PgQuery.parse(query).tree.stmts.all? do |statement|
|
16
18
|
statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt) || statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
|
17
19
|
end
|
18
|
-
rescue PgQuery::ParseError
|
20
|
+
rescue PgQuery::ParseError
|
19
21
|
false
|
20
22
|
end
|
21
23
|
|
22
24
|
def same_table?(query)
|
23
|
-
tables = PgQuery.parse(query).tree.stmts.
|
25
|
+
tables = PgQuery.parse(query).tree.stmts.filter_map do |statement|
|
24
26
|
if statement.stmt.alter_table_stmt.instance_of?(PgQuery::AlterTableStmt)
|
25
27
|
statement.stmt.alter_table_stmt.relation.relname
|
26
28
|
elsif statement.stmt.rename_stmt.instance_of?(PgQuery::RenameStmt)
|
27
29
|
statement.stmt.rename_stmt.relation.relname
|
28
30
|
end
|
29
|
-
end
|
31
|
+
end
|
30
32
|
|
31
33
|
tables.uniq.count == 1
|
32
|
-
rescue PgQuery::ParseError
|
34
|
+
rescue PgQuery::ParseError
|
33
35
|
false
|
34
36
|
end
|
35
37
|
|
36
38
|
def table(query)
|
37
|
-
from_rename_statement = PgQuery.parse(query).tree.stmts.
|
39
|
+
from_rename_statement = PgQuery.parse(query).tree.stmts.filter_map do |statement|
|
38
40
|
statement.stmt.rename_stmt&.relation&.relname
|
39
|
-
end
|
41
|
+
end[0]
|
40
42
|
PgQuery.parse(query).tables[0] || from_rename_statement
|
41
43
|
end
|
42
44
|
|
@@ -48,7 +50,7 @@ module PgOnlineSchemaChange
|
|
48
50
|
connection.async_exec("BEGIN;")
|
49
51
|
|
50
52
|
result = connection.async_exec(query, &block)
|
51
|
-
rescue Exception
|
53
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
52
54
|
connection.cancel if connection.transaction_status != PG::PQTRANS_IDLE
|
53
55
|
connection.block
|
54
56
|
logger.info("Exception raised, rolling back query", { rollback: true, query: query })
|
@@ -144,11 +146,11 @@ module PgOnlineSchemaChange
|
|
144
146
|
end
|
145
147
|
|
146
148
|
references.map do |row|
|
147
|
-
if row["definition"].end_with?("NOT VALID")
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
149
|
+
add_statement = if row["definition"].end_with?("NOT VALID")
|
150
|
+
"ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]};"
|
151
|
+
else
|
152
|
+
"ALTER TABLE #{row["table_on"]} ADD CONSTRAINT #{row["constraint_name"]} #{row["definition"]} NOT VALID;"
|
153
|
+
end
|
152
154
|
|
153
155
|
drop_statement = "ALTER TABLE #{row["table_on"]} DROP CONSTRAINT #{row["constraint_name"]};"
|
154
156
|
|
@@ -291,7 +293,7 @@ module PgOnlineSchemaChange
|
|
291
293
|
client.connection.quote_ident(select_column)
|
292
294
|
end
|
293
295
|
|
294
|
-
|
296
|
+
<<~SQL
|
295
297
|
INSERT INTO #{shadow_table}(#{insert_into_columns.join(", ")})
|
296
298
|
SELECT #{select_columns.join(", ")}
|
297
299
|
FROM ONLY #{client.table}
|
@@ -1,12 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
4
|
+
|
1
5
|
module PgOnlineSchemaChange
|
2
6
|
class Replay
|
3
7
|
extend Helper
|
4
8
|
|
5
9
|
class << self
|
6
|
-
PULL_BATCH_COUNT = 1000
|
7
|
-
DELTA_COUNT = 20
|
8
|
-
RESERVED_COLUMNS = %w[operation_type trigger_time].freeze
|
9
|
-
|
10
10
|
# This, picks PULL_BATCH_COUNT rows by primary key from audit_table,
|
11
11
|
# replays it on the shadow_table. Once the batch is done,
|
12
12
|
# it them deletes those PULL_BATCH_COUNT rows from audit_table. Then, pull another batch,
|
@@ -17,7 +17,7 @@ module PgOnlineSchemaChange
|
|
17
17
|
loop do
|
18
18
|
rows = rows_to_play
|
19
19
|
|
20
|
-
raise CountBelowDelta if rows.count <=
|
20
|
+
raise CountBelowDelta if rows.count <= client.delta_count
|
21
21
|
|
22
22
|
play!(rows)
|
23
23
|
end
|
@@ -25,7 +25,7 @@ module PgOnlineSchemaChange
|
|
25
25
|
|
26
26
|
def rows_to_play(reuse_trasaction = false)
|
27
27
|
select_query = <<~SQL
|
28
|
-
SELECT * FROM #{audit_table} ORDER BY #{
|
28
|
+
SELECT * FROM #{audit_table} ORDER BY #{audit_table_pk} LIMIT #{client.pull_batch_count};
|
29
29
|
SQL
|
30
30
|
|
31
31
|
rows = []
|
@@ -34,6 +34,10 @@ module PgOnlineSchemaChange
|
|
34
34
|
rows
|
35
35
|
end
|
36
36
|
|
37
|
+
def reserved_columns
|
38
|
+
@reserved_columns ||= [trigger_time_column, operation_type_column, audit_table_pk]
|
39
|
+
end
|
40
|
+
|
37
41
|
def play!(rows, reuse_trasaction = false)
|
38
42
|
logger.info("Replaying rows, count: #{rows.size}")
|
39
43
|
|
@@ -44,7 +48,7 @@ module PgOnlineSchemaChange
|
|
44
48
|
|
45
49
|
# Remove audit table cols, since we will be
|
46
50
|
# re-mapping them for inserts and updates
|
47
|
-
|
51
|
+
reserved_columns.each do |col|
|
48
52
|
new_row.delete(col)
|
49
53
|
end
|
50
54
|
|
@@ -73,7 +77,7 @@ module PgOnlineSchemaChange
|
|
73
77
|
client.connection.escape_string(value)
|
74
78
|
end
|
75
79
|
|
76
|
-
case row[
|
80
|
+
case row[operation_type_column]
|
77
81
|
when "INSERT"
|
78
82
|
values = new_row.map { |_, val| "'#{val}'" }.join(",")
|
79
83
|
|
@@ -83,7 +87,7 @@ module PgOnlineSchemaChange
|
|
83
87
|
SQL
|
84
88
|
to_be_replayed << sql
|
85
89
|
|
86
|
-
to_be_deleted_rows << "'#{row[
|
90
|
+
to_be_deleted_rows << "'#{row[audit_table_pk]}'"
|
87
91
|
when "UPDATE"
|
88
92
|
set_values = new_row.map do |column, value|
|
89
93
|
"#{column} = '#{value}'"
|
@@ -96,27 +100,29 @@ module PgOnlineSchemaChange
|
|
96
100
|
SQL
|
97
101
|
to_be_replayed << sql
|
98
102
|
|
99
|
-
to_be_deleted_rows << "'#{row[
|
103
|
+
to_be_deleted_rows << "'#{row[audit_table_pk]}'"
|
100
104
|
when "DELETE"
|
101
105
|
sql = <<~SQL
|
102
106
|
DELETE FROM #{shadow_table} WHERE #{primary_key}=\'#{row[primary_key]}\';
|
103
107
|
SQL
|
104
108
|
to_be_replayed << sql
|
105
109
|
|
106
|
-
to_be_deleted_rows << "'#{row[
|
110
|
+
to_be_deleted_rows << "'#{row[audit_table_pk]}'"
|
107
111
|
end
|
108
112
|
end
|
109
113
|
|
110
114
|
Query.run(client.connection, to_be_replayed.join, reuse_trasaction)
|
111
115
|
|
112
116
|
# Delete items from the audit now that are replayed
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
117
|
+
return unless to_be_deleted_rows.count >= 1
|
118
|
+
|
119
|
+
delete_query = <<~SQL
|
120
|
+
DELETE FROM #{audit_table} WHERE #{audit_table_pk} IN (#{to_be_deleted_rows.join(",")})
|
121
|
+
SQL
|
122
|
+
Query.run(client.connection, delete_query, reuse_trasaction)
|
119
123
|
end
|
120
124
|
end
|
121
125
|
end
|
122
126
|
end
|
127
|
+
|
128
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
@@ -1,17 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "pg_query"
|
2
4
|
require "pg"
|
3
5
|
|
4
6
|
module PgOnlineSchemaChange
|
5
7
|
class Store
|
6
8
|
class << self
|
7
|
-
|
9
|
+
@object = {}
|
8
10
|
|
9
11
|
def get(key)
|
10
|
-
|
12
|
+
@object ||= {}
|
13
|
+
@object[key.to_s] || @object[key.to_sym]
|
11
14
|
end
|
12
15
|
|
13
16
|
def set(key, value)
|
14
|
-
|
17
|
+
@object ||= {}
|
18
|
+
@object[key.to_sym] = value
|
15
19
|
end
|
16
20
|
end
|
17
21
|
end
|
@@ -6,28 +6,24 @@ require "ougai"
|
|
6
6
|
require "pg_online_schema_change/version"
|
7
7
|
require "pg_online_schema_change/helper"
|
8
8
|
require "pg_online_schema_change/functions"
|
9
|
-
require "pg_online_schema_change/cli"
|
10
9
|
require "pg_online_schema_change/client"
|
11
10
|
require "pg_online_schema_change/query"
|
12
11
|
require "pg_online_schema_change/store"
|
13
12
|
require "pg_online_schema_change/replay"
|
14
13
|
require "pg_online_schema_change/orchestrate"
|
14
|
+
require "pg_online_schema_change/cli"
|
15
15
|
|
16
16
|
module PgOnlineSchemaChange
|
17
17
|
class Error < StandardError; end
|
18
18
|
class CountBelowDelta < StandardError; end
|
19
19
|
class AccessExclusiveLockNotAcquired < StandardError; end
|
20
20
|
|
21
|
-
def self.logger
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.logger
|
31
|
-
@@logger
|
21
|
+
def self.logger(verbose: false)
|
22
|
+
@logger ||= begin
|
23
|
+
logger = Ougai::Logger.new($stdout)
|
24
|
+
logger.level = verbose ? Ougai::Logger::TRACE : Ougai::Logger::INFO
|
25
|
+
logger.with_fields = { version: PgOnlineSchemaChange::VERSION }
|
26
|
+
logger
|
27
|
+
end
|
32
28
|
end
|
33
29
|
end
|
data/scripts/release.sh
CHANGED
@@ -11,8 +11,11 @@ gem build pg_online_schema_change.gemspec
|
|
11
11
|
echo "=== Pushing gem ===="
|
12
12
|
gem push pg_online_schema_change-$VERSION.gem
|
13
13
|
|
14
|
+
echo "=== Sleeping for 5s ===="
|
15
|
+
sleep 5
|
16
|
+
|
14
17
|
echo "=== Building Image ===="
|
15
|
-
docker build . --build-arg VERSION=$VERSION -t pg-osc
|
18
|
+
docker build . --build-arg VERSION=$VERSION -t shayonj/pg-osc:$VERSION
|
16
19
|
|
17
20
|
echo "=== Tagging Image ===="
|
18
21
|
docker image tag shayonj/pg-osc:$VERSION shayonj/pg-osc:latest
|
@@ -20,3 +23,6 @@ docker image tag shayonj/pg-osc:$VERSION shayonj/pg-osc:latest
|
|
20
23
|
echo "=== Pushing Image ===="
|
21
24
|
docker push shayonj/pg-osc:$VERSION
|
22
25
|
docker push shayonj/pg-osc:latest
|
26
|
+
|
27
|
+
echo "=== Cleaning up ===="
|
28
|
+
rm pg_online_schema_change-$VERSION.gem
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_online_schema_change
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shayon Mukherjee
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-02-
|
11
|
+
date: 2022-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ougai
|
@@ -191,6 +191,7 @@ extra_rdoc_files: []
|
|
191
191
|
files:
|
192
192
|
- ".rspec"
|
193
193
|
- ".rubocop.yml"
|
194
|
+
- ".rubocop_todo.yml"
|
194
195
|
- ".ruby-version"
|
195
196
|
- CHANGELOG.md
|
196
197
|
- CODE_OF_CONDUCT.md
|
@@ -203,9 +204,11 @@ files:
|
|
203
204
|
- bin/console
|
204
205
|
- bin/pg-online-schema-change
|
205
206
|
- bin/setup
|
206
|
-
- diagrams/how-it-works.excalidraw
|
207
|
-
- diagrams/how-it-works.png
|
208
207
|
- docker-compose.yml
|
208
|
+
- docs/how-it-works.excalidraw
|
209
|
+
- docs/how-it-works.png
|
210
|
+
- docs/load-test-1.png
|
211
|
+
- docs/load-test.md
|
209
212
|
- lib/pg_online_schema_change.rb
|
210
213
|
- lib/pg_online_schema_change/cli.rb
|
211
214
|
- lib/pg_online_schema_change/client.rb
|