backfiller 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +86 -10
- data/backfiller.gemspec +1 -1
- data/lib/backfiller.rb +1 -1
- data/lib/backfiller/runner.rb +8 -5
- metadata +3 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e03f52ddf2d8dbe1a7aada95079d1bbbeb6588e0a361122062a6c7e1c871d01d
|
4
|
+
data.tar.gz: d6b0f6b92fe6da8ac84338ff76435f16a6eea506ce42e762a594f2e81af05542
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2d62553aa42545079e829e93c58d01171047b6874112e123fe2f4bc0bb0bf5117c5334bd5b527600d216bc3997ed137b8f5ffded9327e068fde057f006b909c
|
7
|
+
data.tar.gz: ab0c2aa054baabd9afd9b94bdd5cf9b3313420ef41971c374b3c4571cacf551ebef04e7dc0488f389a6f179a766fea7cb15bf4133a1b58d274b79331127353df
|
data/README.md
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+

|
2
|
+
|
1
3
|
# Backfiller
|
2
4
|
|
3
5
|
The backfill machine for null database columns.
|
@@ -14,10 +16,9 @@ This gem maybe handly for `no-downtime` deployment especially when you need to f
|
|
14
16
|
* add migration that disallow null values (null: false)
|
15
17
|
* deploy code that starts using new column
|
16
18
|
|
17
|
-
|
18
19
|
## Concept
|
19
20
|
|
20
|
-
|
21
|
+
The idea is to prepare all data in selection method on database server and fetch it data using CURSOR feature and then build simple UPDATE queries.
|
21
22
|
With this way we minimize db server resources usage and we lock only one record (atomic update).
|
22
23
|
We use two connections to database:
|
23
24
|
* master - to creates cursor in transaction and fetch data in batches.
|
@@ -27,7 +28,8 @@ Even if backfill process crashes you may resolve issue and run it again to proce
|
|
27
28
|
|
28
29
|
## Connection adapters
|
29
30
|
|
30
|
-
Curently it
|
31
|
+
Curently it support next ActiveRecord connection adapters:
|
32
|
+
* PostgreSQL
|
31
33
|
|
32
34
|
## Installation
|
33
35
|
|
@@ -51,10 +53,13 @@ Assume we we want to backfill `profiles.name` column from `users.first_name`, `u
|
|
51
53
|
|
52
54
|
Create backfill task into `db/backfill/profile_name.rb` and defined required methods:
|
53
55
|
|
56
|
+
#### Single worker execution query
|
57
|
+
|
54
58
|
```ruby
|
55
59
|
class Backfill::ProfileName
|
60
|
+
|
56
61
|
def select_sql
|
57
|
-
|
62
|
+
<<~SQL
|
58
63
|
SELECT
|
59
64
|
profile.id AS profile_id,
|
60
65
|
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
@@ -66,31 +71,102 @@ class Backfill::ProfileName
|
|
66
71
|
SQL
|
67
72
|
end
|
68
73
|
|
69
|
-
def
|
70
|
-
|
74
|
+
def execute_sql(connection, row)
|
75
|
+
<<~SQL
|
71
76
|
UPDATE profiles SET
|
72
77
|
name = #{connection.quote(row['profile_name'])}
|
73
|
-
|
74
|
-
id = #{connection.quote(row[
|
78
|
+
WHERE
|
79
|
+
id = #{connection.quote(row['profile_id'])}
|
75
80
|
SQL
|
76
81
|
end
|
82
|
+
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
#### Multiple worker execution queries
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
class Backfill::ProfileName
|
90
|
+
|
91
|
+
def select_sql
|
92
|
+
<<~SQL
|
93
|
+
SELECT
|
94
|
+
profile.id AS profile_id,
|
95
|
+
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
96
|
+
FROM profiles
|
97
|
+
INNER JOIN users ON
|
98
|
+
users.id = profiles.user_id
|
99
|
+
WHERE
|
100
|
+
profiles.name IS NULL
|
101
|
+
SQL
|
102
|
+
end
|
103
|
+
|
104
|
+
def execute_sql(connection, row)
|
105
|
+
[
|
106
|
+
'BEGIN',
|
107
|
+
<<~SQL,
|
108
|
+
UPDATE profiles SET
|
109
|
+
name = #{connection.quote(row['profile_name'])}
|
110
|
+
WHERE
|
111
|
+
id = #{connection.quote(row['profile_id'])} AND
|
112
|
+
(SELECT pg_try_advisory_xact_lock(12345678)') = TRUE
|
113
|
+
SQL
|
114
|
+
'COMMIT'
|
115
|
+
]
|
116
|
+
end
|
117
|
+
|
77
118
|
end
|
119
|
+
|
78
120
|
```
|
79
121
|
|
122
|
+
#### Custom row processing
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
class Backfill::ProfileName
|
126
|
+
|
127
|
+
def select_sql
|
128
|
+
<<~SQL
|
129
|
+
SELECT
|
130
|
+
profile.id AS profile_id,
|
131
|
+
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
132
|
+
FROM profiles
|
133
|
+
INNER JOIN users ON
|
134
|
+
users.id = profiles.user_id
|
135
|
+
WHERE
|
136
|
+
profiles.name IS NULL
|
137
|
+
SQL
|
138
|
+
end
|
139
|
+
|
140
|
+
def process_row(connection, row)
|
141
|
+
connection.execute 'BEGIN'
|
142
|
+
if connection.select_value 'SELECT pg_try_advisory_xact_lock(12345678)'
|
143
|
+
connection.execute <<~SQL
|
144
|
+
INSERT INTO contacts(
|
145
|
+
full_name
|
146
|
+
)
|
147
|
+
VALUES(
|
148
|
+
#{connection.quote(row['profile_name'])},
|
149
|
+
)
|
150
|
+
SQL
|
151
|
+
connection.execute 'COMMIT'
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
```
|
80
157
|
And then just run rake task:
|
81
158
|
|
82
159
|
```bash
|
83
160
|
$ rails db:backfill[profile_name]
|
84
161
|
```
|
85
162
|
|
86
|
-
|
87
163
|
## Configuration
|
88
164
|
|
89
165
|
For Rails application backfiller is initialized with next options
|
90
166
|
|
91
167
|
* task_directory: `RAILS_ROOT/db/backfill`
|
92
168
|
* task_namespace: `Backfill`
|
93
|
-
* batch_size
|
169
|
+
* batch_size: `1_000`
|
94
170
|
* connection_pool: `ApplicationRecord.connection_pool`
|
95
171
|
* logger: `ApplicationRecord.logger`
|
96
172
|
|
data/backfiller.gemspec
CHANGED
data/lib/backfiller.rb
CHANGED
data/lib/backfiller/runner.rb
CHANGED
@@ -4,12 +4,14 @@ module Backfiller
|
|
4
4
|
attr_reader \
|
5
5
|
:task,
|
6
6
|
:connection_pool,
|
7
|
-
:batch_size
|
7
|
+
:batch_size,
|
8
|
+
:process_method
|
8
9
|
|
9
10
|
def initialize(task_name)
|
10
11
|
@task = build_task(task_name)
|
11
12
|
@connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : Backfiller.connection_pool
|
12
13
|
@batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
|
14
|
+
@process_method = @task.respond_to?(:process_row) ? @task.method(:process_row) : self.method(:process_row)
|
13
15
|
end
|
14
16
|
|
15
17
|
def run
|
@@ -17,7 +19,7 @@ module Backfiller
|
|
17
19
|
worker_connection = acquire_connection
|
18
20
|
|
19
21
|
fetch_each(master_connection) do |row|
|
20
|
-
|
22
|
+
process_method.call(worker_connection, row)
|
21
23
|
end
|
22
24
|
|
23
25
|
release_connection(master_connection)
|
@@ -80,9 +82,10 @@ module Backfiller
|
|
80
82
|
end
|
81
83
|
end
|
82
84
|
|
83
|
-
def
|
84
|
-
|
85
|
+
def process_row(connection, row)
|
86
|
+
Array(task.execute_sql(connection, row)).each do |sql|
|
87
|
+
connection.execute(sql)
|
88
|
+
end
|
85
89
|
end
|
86
|
-
|
87
90
|
end
|
88
91
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: backfiller
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andriy Yanko
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -107,10 +107,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
107
|
- !ruby/object:Gem::Version
|
108
108
|
version: '0'
|
109
109
|
requirements: []
|
110
|
-
|
111
|
-
rubygems_version: 2.6.11
|
110
|
+
rubygems_version: 3.0.3
|
112
111
|
signing_key:
|
113
112
|
specification_version: 4
|
114
113
|
summary: Backfiller for null database columns
|
115
114
|
test_files: []
|
116
|
-
has_rdoc:
|