backfiller 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +86 -10
- data/backfiller.gemspec +1 -1
- data/lib/backfiller.rb +1 -1
- data/lib/backfiller/runner.rb +8 -5
- metadata +3 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e03f52ddf2d8dbe1a7aada95079d1bbbeb6588e0a361122062a6c7e1c871d01d
|
4
|
+
data.tar.gz: d6b0f6b92fe6da8ac84338ff76435f16a6eea506ce42e762a594f2e81af05542
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2d62553aa42545079e829e93c58d01171047b6874112e123fe2f4bc0bb0bf5117c5334bd5b527600d216bc3997ed137b8f5ffded9327e068fde057f006b909c
|
7
|
+
data.tar.gz: ab0c2aa054baabd9afd9b94bdd5cf9b3313420ef41971c374b3c4571cacf551ebef04e7dc0488f389a6f179a766fea7cb15bf4133a1b58d274b79331127353df
|
data/README.md
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
![Backfill machine](https://railsware.github.io/backfiller/assets/backfill_machine.jpg)
|
2
|
+
|
1
3
|
# Backfiller
|
2
4
|
|
3
5
|
The backfill machine for null database columns.
|
@@ -14,10 +16,9 @@ This gem maybe handly for `no-downtime` deployment especially when you need to f
|
|
14
16
|
* add migration that disallow null values (null: false)
|
15
17
|
* deploy code that starts using new column
|
16
18
|
|
17
|
-
|
18
19
|
## Concept
|
19
20
|
|
20
|
-
|
21
|
+
The idea is to prepare all data in selection method on database server and fetch it data using CURSOR feature and then build simple UPDATE queries.
|
21
22
|
With this way we minimize db server resources usage and we lock only one record (atomic update).
|
22
23
|
We use two connections to database:
|
23
24
|
* master - to creates cursor in transaction and fetch data in batches.
|
@@ -27,7 +28,8 @@ Even if backfill process crashes you may resolve issue and run it again to proce
|
|
27
28
|
|
28
29
|
## Connection adapters
|
29
30
|
|
30
|
-
Curently it
|
31
|
+
Curently it support next ActiveRecord connection adapters:
|
32
|
+
* PostgreSQL
|
31
33
|
|
32
34
|
## Installation
|
33
35
|
|
@@ -51,10 +53,13 @@ Assume we we want to backfill `profiles.name` column from `users.first_name`, `u
|
|
51
53
|
|
52
54
|
Create backfill task into `db/backfill/profile_name.rb` and defined required methods:
|
53
55
|
|
56
|
+
#### Single worker execution query
|
57
|
+
|
54
58
|
```ruby
|
55
59
|
class Backfill::ProfileName
|
60
|
+
|
56
61
|
def select_sql
|
57
|
-
|
62
|
+
<<~SQL
|
58
63
|
SELECT
|
59
64
|
profile.id AS profile_id,
|
60
65
|
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
@@ -66,31 +71,102 @@ class Backfill::ProfileName
|
|
66
71
|
SQL
|
67
72
|
end
|
68
73
|
|
69
|
-
def
|
70
|
-
|
74
|
+
def execute_sql(connection, row)
|
75
|
+
<<~SQL
|
71
76
|
UPDATE profiles SET
|
72
77
|
name = #{connection.quote(row['profile_name'])}
|
73
|
-
|
74
|
-
id = #{connection.quote(row[
|
78
|
+
WHERE
|
79
|
+
id = #{connection.quote(row['profile_id'])}
|
75
80
|
SQL
|
76
81
|
end
|
82
|
+
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
#### Multiple worker execution queries
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
class Backfill::ProfileName
|
90
|
+
|
91
|
+
def select_sql
|
92
|
+
<<~SQL
|
93
|
+
SELECT
|
94
|
+
profile.id AS profile_id,
|
95
|
+
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
96
|
+
FROM profiles
|
97
|
+
INNER JOIN users ON
|
98
|
+
users.id = profiles.user_id
|
99
|
+
WHERE
|
100
|
+
profiles.name IS NULL
|
101
|
+
SQL
|
102
|
+
end
|
103
|
+
|
104
|
+
def execute_sql(connection, row)
|
105
|
+
[
|
106
|
+
'BEGIN',
|
107
|
+
<<~SQL,
|
108
|
+
UPDATE profiles SET
|
109
|
+
name = #{connection.quote(row['profile_name'])}
|
110
|
+
WHERE
|
111
|
+
id = #{connection.quote(row['profile_id'])} AND
|
112
|
+
(SELECT pg_try_advisory_xact_lock(12345678)') = TRUE
|
113
|
+
SQL
|
114
|
+
'COMMIT'
|
115
|
+
]
|
116
|
+
end
|
117
|
+
|
77
118
|
end
|
119
|
+
|
78
120
|
```
|
79
121
|
|
122
|
+
#### Custom row processing
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
class Backfill::ProfileName
|
126
|
+
|
127
|
+
def select_sql
|
128
|
+
<<~SQL
|
129
|
+
SELECT
|
130
|
+
profile.id AS profile_id,
|
131
|
+
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
132
|
+
FROM profiles
|
133
|
+
INNER JOIN users ON
|
134
|
+
users.id = profiles.user_id
|
135
|
+
WHERE
|
136
|
+
profiles.name IS NULL
|
137
|
+
SQL
|
138
|
+
end
|
139
|
+
|
140
|
+
def process_row(connection, row)
|
141
|
+
connection.execute 'BEGIN'
|
142
|
+
if connection.select_value 'SELECT pg_try_advisory_xact_lock(12345678)'
|
143
|
+
connection.execute <<~SQL
|
144
|
+
INSERT INTO contacts(
|
145
|
+
full_name
|
146
|
+
)
|
147
|
+
VALUES(
|
148
|
+
#{connection.quote(row['profile_name'])},
|
149
|
+
)
|
150
|
+
SQL
|
151
|
+
connection.execute 'COMMIT'
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
```
|
80
157
|
And then just run rake task:
|
81
158
|
|
82
159
|
```bash
|
83
160
|
$ rails db:backfill[profile_name]
|
84
161
|
```
|
85
162
|
|
86
|
-
|
87
163
|
## Configuration
|
88
164
|
|
89
165
|
For Rails application backfiller is initialized with next options
|
90
166
|
|
91
167
|
* task_directory: `RAILS_ROOT/db/backfill`
|
92
168
|
* task_namespace: `Backfill`
|
93
|
-
* batch_size
|
169
|
+
* batch_size: `1_000`
|
94
170
|
* connection_pool: `ApplicationRecord.connection_pool`
|
95
171
|
* logger: `ApplicationRecord.logger`
|
96
172
|
|
data/backfiller.gemspec
CHANGED
data/lib/backfiller.rb
CHANGED
data/lib/backfiller/runner.rb
CHANGED
@@ -4,12 +4,14 @@ module Backfiller
|
|
4
4
|
attr_reader \
|
5
5
|
:task,
|
6
6
|
:connection_pool,
|
7
|
-
:batch_size
|
7
|
+
:batch_size,
|
8
|
+
:process_method
|
8
9
|
|
9
10
|
def initialize(task_name)
|
10
11
|
@task = build_task(task_name)
|
11
12
|
@connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : Backfiller.connection_pool
|
12
13
|
@batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
|
14
|
+
@process_method = @task.respond_to?(:process_row) ? @task.method(:process_row) : self.method(:process_row)
|
13
15
|
end
|
14
16
|
|
15
17
|
def run
|
@@ -17,7 +19,7 @@ module Backfiller
|
|
17
19
|
worker_connection = acquire_connection
|
18
20
|
|
19
21
|
fetch_each(master_connection) do |row|
|
20
|
-
|
22
|
+
process_method.call(worker_connection, row)
|
21
23
|
end
|
22
24
|
|
23
25
|
release_connection(master_connection)
|
@@ -80,9 +82,10 @@ module Backfiller
|
|
80
82
|
end
|
81
83
|
end
|
82
84
|
|
83
|
-
def
|
84
|
-
|
85
|
+
def process_row(connection, row)
|
86
|
+
Array(task.execute_sql(connection, row)).each do |sql|
|
87
|
+
connection.execute(sql)
|
88
|
+
end
|
85
89
|
end
|
86
|
-
|
87
90
|
end
|
88
91
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: backfiller
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andriy Yanko
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -107,10 +107,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
107
|
- !ruby/object:Gem::Version
|
108
108
|
version: '0'
|
109
109
|
requirements: []
|
110
|
-
|
111
|
-
rubygems_version: 2.6.11
|
110
|
+
rubygems_version: 3.0.3
|
112
111
|
signing_key:
|
113
112
|
specification_version: 4
|
114
113
|
summary: Backfiller for null database columns
|
115
114
|
test_files: []
|
116
|
-
has_rdoc:
|