backfiller 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/Gemfile +6 -0
- data/README.md +117 -0
- data/Rakefile +6 -0
- data/backfiller.gemspec +27 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/backfiller.rb +5 -0
- data/lib/backfiller/configuration.rb +27 -0
- data/lib/backfiller/cursor.rb +16 -0
- data/lib/backfiller/cursor/postgresql.rb +27 -0
- data/lib/backfiller/railtie.rb +28 -0
- data/lib/backfiller/runner.rb +88 -0
- data/lib/backfiller/tasks/db.rake +10 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3c3e9ee72c70a3b89caba10d6073a87058290680
|
4
|
+
data.tar.gz: 22071f36c634b6353787cac65a8c2984e8eb1ade
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3931af99bdc3771c1636c24e4ce1374ca0af0739090c6f56defd530f4fa26f6cd3ac497feb9cd2824fa91a7f8c6a2c4e3cef0b64fb51b4d76cd4f13d90c033c1
|
7
|
+
data.tar.gz: c6bd78c26fd968f0df688bb89b52d22dc7681e9f781825300c4d5183310db95aaab56a6e1efc3b0ed8f0419ec20dd271e2b46d4114f2fbba89aed424e266debb
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# Backfiller
|
2
|
+
|
3
|
+
The backfill machine for null database columns.
|
4
|
+
This gem maybe handly for `no-downtime` deployment especially when you need to fill columns for table with huge amount for records without locking the table.
|
5
|
+
|
6
|
+
## Typical no-downtime and non-locking cycle
|
7
|
+
|
8
|
+
* add migaration that adds new column (null: true)
|
9
|
+
* deploy and run migration task
|
10
|
+
* deploy code that starts filling new column in corresponding flows
|
11
|
+
* add backfill task
|
12
|
+
* deploy and run backflill task
|
13
|
+
* [optional] add migration that invokes backfill task asn so keep all environments consistent (except production environment because we already backfilled data)
|
14
|
+
* add migration that disallow null values (null: false)
|
15
|
+
* deploy code that starts using new column
|
16
|
+
|
17
|
+
|
18
|
+
## Concept
|
19
|
+
|
20
|
+
Idea is to prepare all data in selection method on database server and fetch all data using CURSOR and then build simple UPDATE queries.
|
21
|
+
With this way we minimize db server resources usage and we lock only one record (atomic update).
|
22
|
+
We use two connections to database:
|
23
|
+
* master - to creates cursor in transaction and fetch data in batches.
|
24
|
+
* worker - to execute small atomic update queries (no wrapper transaction)
|
25
|
+
|
26
|
+
Even if backfill process crashes you may resolve issue and run it again to process remaining amount of data.
|
27
|
+
|
28
|
+
## Connection adapters
|
29
|
+
|
30
|
+
Curently it supports only PostgreSQL ActiveRecord adapter.
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
Add this line to your application's Gemfile:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
gem 'backfiller'
|
38
|
+
```
|
39
|
+
|
40
|
+
And then execute:
|
41
|
+
|
42
|
+
$ bundle
|
43
|
+
|
44
|
+
Or install it yourself as:
|
45
|
+
|
46
|
+
$ gem install backfiller
|
47
|
+
|
48
|
+
## Usage
|
49
|
+
|
50
|
+
Assume we we want to backfill `profiles.name` column from `users.first_name`, `users.last_name` columns.
|
51
|
+
|
52
|
+
Create backfill task into `db/backfill/profile_name.rb` and defined required methods:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
class Backfill::ProfileName
|
56
|
+
def select_sql
|
57
|
+
<<-SQL.strip_heredoc
|
58
|
+
SELECT
|
59
|
+
profile.id AS profile_id,
|
60
|
+
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
61
|
+
FROM profiles
|
62
|
+
INNER JOIN users ON
|
63
|
+
users.id = profiles.user_id
|
64
|
+
WHERE
|
65
|
+
profiles.name IS NULL
|
66
|
+
SQL
|
67
|
+
end
|
68
|
+
|
69
|
+
def update_sql(connection, row)
|
70
|
+
<<-SQL.strip_heredoc
|
71
|
+
UPDATE profiles SET
|
72
|
+
name = #{connection.quote(row['profile_name'])}
|
73
|
+
WHERE
|
74
|
+
id = #{connection.quote(row[:profile_id])}
|
75
|
+
SQL
|
76
|
+
end
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
And then just run rake task:
|
81
|
+
|
82
|
+
```bash
|
83
|
+
$ rails db:backfill[profile_name]
|
84
|
+
```
|
85
|
+
|
86
|
+
|
87
|
+
## Configuration
|
88
|
+
|
89
|
+
For Rails application backfiller is initialized with next options
|
90
|
+
|
91
|
+
* task_directory: `RAILS_ROOT/db/backfill`
|
92
|
+
* task_namespace: `Backfill`
|
93
|
+
* batch_size - `1_000`
|
94
|
+
* connection_pool: `ApplicationRecord.connection_pool`
|
95
|
+
* logger: `ApplicationRecord.logger`
|
96
|
+
|
97
|
+
You may change it globally via `config/initializers/backfiller.rb`:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
Backfiller.configure do |config|
|
101
|
+
config.foo = bar
|
102
|
+
end
|
103
|
+
```
|
104
|
+
|
105
|
+
Or specify some options in certain backfill task
|
106
|
+
|
107
|
+
```ruby
|
108
|
+
class Backfill::Foo
|
109
|
+
def batch_size
|
110
|
+
100
|
111
|
+
end
|
112
|
+
end
|
113
|
+
```
|
114
|
+
|
115
|
+
## Authors
|
116
|
+
|
117
|
+
* [Andriy Yanko](http://ayanko.github.io)
|
data/Rakefile
ADDED
data/backfiller.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "backfiller"
|
7
|
+
spec.version = "0.0.1"
|
8
|
+
spec.authors = ["Andriy Yanko"]
|
9
|
+
spec.email = ["andriy.yanko@railsware.com"]
|
10
|
+
|
11
|
+
spec.summary = %q{Backfiller for null database columns}
|
12
|
+
spec.homepage = "https://github.com/railsware/backfiller"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
|
+
f.match(%r{^(test|spec|features)/})
|
17
|
+
end
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency "activerecord", ">= 5.0.0"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.15"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "backfiller"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/backfiller.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Backfiller
|
2
|
+
|
3
|
+
class << self
|
4
|
+
def configure
|
5
|
+
yield self
|
6
|
+
end
|
7
|
+
|
8
|
+
attr_accessor :task_directory
|
9
|
+
|
10
|
+
attr_accessor :task_namespace
|
11
|
+
|
12
|
+
attr_accessor :connection_pool
|
13
|
+
|
14
|
+
attr_accessor :batch_size
|
15
|
+
|
16
|
+
attr_accessor :logger
|
17
|
+
|
18
|
+
def run(task_name)
|
19
|
+
Backfiller::Runner.new(task_name).run
|
20
|
+
end
|
21
|
+
|
22
|
+
def log(message)
|
23
|
+
logger.info "[Backfiller] #{message}" if logger
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'backfiller/cursor/postgresql'
|
2
|
+
|
3
|
+
module Backfiller
|
4
|
+
module Cursor
|
5
|
+
|
6
|
+
def self.new(connection, *args)
|
7
|
+
case connection
|
8
|
+
when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
|
9
|
+
Backfiller::Cursor::Postgresql.new(connection, *args)
|
10
|
+
else
|
11
|
+
raise "Unsupported connection #{connection.inspect}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Backfiller
|
2
|
+
module Cursor
|
3
|
+
class Postgresql
|
4
|
+
|
5
|
+
attr_reader :connection
|
6
|
+
|
7
|
+
def initialize(connection, name, query)
|
8
|
+
@connection = connection
|
9
|
+
@name = name
|
10
|
+
@query = query
|
11
|
+
end
|
12
|
+
|
13
|
+
def open
|
14
|
+
@connection.execute "DECLARE #{@name} NO SCROLL CURSOR WITHOUT HOLD FOR #{@query}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def fetch(count)
|
18
|
+
@connection.select_all "FETCH #{count} FROM #{@name}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def close
|
22
|
+
@connection.execute "CLOSE #{@name}"
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Backfiller
|
2
|
+
class Railtie < Rails::Railtie
|
3
|
+
|
4
|
+
rake_tasks do
|
5
|
+
load 'backfiller/tasks/db.rake'
|
6
|
+
end
|
7
|
+
|
8
|
+
initializer 'backfiller.configure' do
|
9
|
+
Backfiller.configure do |config|
|
10
|
+
config.task_directory = Rails.root.join('db', 'backfill')
|
11
|
+
|
12
|
+
config.task_namespace = 'backfill'
|
13
|
+
|
14
|
+
config.batch_size = 1_000
|
15
|
+
|
16
|
+
config.connection_pool = defined?(ApplicationRecord) ? ApplicationRecord.connection_pool : ActiveRecord::Base.connection_pool
|
17
|
+
|
18
|
+
config.logger = defined?(ApplicationRecord) ? ApplicationRecord.logger : ActiveRecord::Base.logger
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
config.after_initialize do
|
23
|
+
task_module = Backfiller.task_namespace.classify
|
24
|
+
Object.const_set(task_module, Module.new) unless Object.const_defined?(task_module)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Backfiller
|
2
|
+
class Runner
|
3
|
+
|
4
|
+
attr_reader \
|
5
|
+
:task,
|
6
|
+
:connection_pool,
|
7
|
+
:batch_size
|
8
|
+
|
9
|
+
def initialize(task_name)
|
10
|
+
@task = build_task(task_name)
|
11
|
+
@connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : Backfiller.connection_pool
|
12
|
+
@batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
master_connection = acquire_connection
|
17
|
+
worker_connection = acquire_connection
|
18
|
+
|
19
|
+
fetch_each(master_connection) do |row|
|
20
|
+
update_row(worker_connection, row)
|
21
|
+
end
|
22
|
+
|
23
|
+
release_connection(master_connection)
|
24
|
+
release_connection(worker_connection)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def build_task(task_name)
|
30
|
+
Backfiller.log "Build #{task_name} task"
|
31
|
+
require File.join(Backfiller.task_directory, task_name)
|
32
|
+
"#{Backfiller.task_namespace}/#{task_name}".classify.constantize.new
|
33
|
+
end
|
34
|
+
|
35
|
+
###########################################################################
|
36
|
+
|
37
|
+
def acquire_connection
|
38
|
+
connection_pool.checkout
|
39
|
+
end
|
40
|
+
|
41
|
+
def release_connection(connection)
|
42
|
+
connection_pool.checkin(connection)
|
43
|
+
end
|
44
|
+
|
45
|
+
###########################################################################
|
46
|
+
|
47
|
+
def build_cursor(connection)
|
48
|
+
Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql)
|
49
|
+
end
|
50
|
+
|
51
|
+
def fetch_each(master_connection, &block)
|
52
|
+
cursor = build_cursor(master_connection)
|
53
|
+
|
54
|
+
cursor.connection.transaction do
|
55
|
+
Backfiller.log "Open cursor"
|
56
|
+
cursor.open
|
57
|
+
|
58
|
+
Backfiller.log "Start fetch loop"
|
59
|
+
fetch_loop(cursor, &block)
|
60
|
+
|
61
|
+
Backfiller.log "Close cursor"
|
62
|
+
cursor.close
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def fetch_loop(cursor, &block)
|
67
|
+
count = 0
|
68
|
+
|
69
|
+
loop do
|
70
|
+
result = cursor.fetch(batch_size)
|
71
|
+
|
72
|
+
break if result.empty?
|
73
|
+
|
74
|
+
result.each do |row|
|
75
|
+
block.call(row)
|
76
|
+
count += 1
|
77
|
+
end
|
78
|
+
|
79
|
+
Backfiller.log "Processed #{count}"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def update_row(connection, row)
|
84
|
+
connection.execute task.update_sql(connection, row)
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
namespace :db do
|
2
|
+
|
3
|
+
desc 'Run database backfill task'
|
4
|
+
task :backfill, [:name] => :environment do |t, args|
|
5
|
+
raise 'Please specify backfill task name' unless args[:name]
|
6
|
+
Backfiller.logger.level = :info if Backfiller.logger
|
7
|
+
Backfiller.run(args[:name])
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: backfiller
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andriy Yanko
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-09-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 5.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 5.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.15'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.15'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
description:
|
70
|
+
email:
|
71
|
+
- andriy.yanko@railsware.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".rspec"
|
78
|
+
- Gemfile
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- backfiller.gemspec
|
82
|
+
- bin/console
|
83
|
+
- bin/setup
|
84
|
+
- lib/backfiller.rb
|
85
|
+
- lib/backfiller/configuration.rb
|
86
|
+
- lib/backfiller/cursor.rb
|
87
|
+
- lib/backfiller/cursor/postgresql.rb
|
88
|
+
- lib/backfiller/railtie.rb
|
89
|
+
- lib/backfiller/runner.rb
|
90
|
+
- lib/backfiller/tasks/db.rake
|
91
|
+
homepage: https://github.com/railsware/backfiller
|
92
|
+
licenses:
|
93
|
+
- MIT
|
94
|
+
metadata: {}
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
require_paths:
|
98
|
+
- lib
|
99
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 2.6.11
|
112
|
+
signing_key:
|
113
|
+
specification_version: 4
|
114
|
+
summary: Backfiller for null database columns
|
115
|
+
test_files: []
|
116
|
+
has_rdoc:
|