backfiller 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/Gemfile +6 -0
- data/README.md +117 -0
- data/Rakefile +6 -0
- data/backfiller.gemspec +27 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/backfiller.rb +5 -0
- data/lib/backfiller/configuration.rb +27 -0
- data/lib/backfiller/cursor.rb +16 -0
- data/lib/backfiller/cursor/postgresql.rb +27 -0
- data/lib/backfiller/railtie.rb +28 -0
- data/lib/backfiller/runner.rb +88 -0
- data/lib/backfiller/tasks/db.rake +10 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3c3e9ee72c70a3b89caba10d6073a87058290680
|
4
|
+
data.tar.gz: 22071f36c634b6353787cac65a8c2984e8eb1ade
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3931af99bdc3771c1636c24e4ce1374ca0af0739090c6f56defd530f4fa26f6cd3ac497feb9cd2824fa91a7f8c6a2c4e3cef0b64fb51b4d76cd4f13d90c033c1
|
7
|
+
data.tar.gz: c6bd78c26fd968f0df688bb89b52d22dc7681e9f781825300c4d5183310db95aaab56a6e1efc3b0ed8f0419ec20dd271e2b46d4114f2fbba89aed424e266debb
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# Backfiller
|
2
|
+
|
3
|
+
The backfill machine for null database columns.
|
4
|
+
This gem maybe handly for `no-downtime` deployment especially when you need to fill columns for table with huge amount for records without locking the table.
|
5
|
+
|
6
|
+
## Typical no-downtime and non-locking cycle
|
7
|
+
|
8
|
+
* add migaration that adds new column (null: true)
|
9
|
+
* deploy and run migration task
|
10
|
+
* deploy code that starts filling new column in corresponding flows
|
11
|
+
* add backfill task
|
12
|
+
* deploy and run backflill task
|
13
|
+
* [optional] add migration that invokes backfill task asn so keep all environments consistent (except production environment because we already backfilled data)
|
14
|
+
* add migration that disallow null values (null: false)
|
15
|
+
* deploy code that starts using new column
|
16
|
+
|
17
|
+
|
18
|
+
## Concept
|
19
|
+
|
20
|
+
Idea is to prepare all data in selection method on database server and fetch all data using CURSOR and then build simple UPDATE queries.
|
21
|
+
With this way we minimize db server resources usage and we lock only one record (atomic update).
|
22
|
+
We use two connections to database:
|
23
|
+
* master - to creates cursor in transaction and fetch data in batches.
|
24
|
+
* worker - to execute small atomic update queries (no wrapper transaction)
|
25
|
+
|
26
|
+
Even if backfill process crashes you may resolve issue and run it again to process remaining amount of data.
|
27
|
+
|
28
|
+
## Connection adapters
|
29
|
+
|
30
|
+
Curently it supports only PostgreSQL ActiveRecord adapter.
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
Add this line to your application's Gemfile:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
gem 'backfiller'
|
38
|
+
```
|
39
|
+
|
40
|
+
And then execute:
|
41
|
+
|
42
|
+
$ bundle
|
43
|
+
|
44
|
+
Or install it yourself as:
|
45
|
+
|
46
|
+
$ gem install backfiller
|
47
|
+
|
48
|
+
## Usage
|
49
|
+
|
50
|
+
Assume we we want to backfill `profiles.name` column from `users.first_name`, `users.last_name` columns.
|
51
|
+
|
52
|
+
Create backfill task into `db/backfill/profile_name.rb` and defined required methods:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
class Backfill::ProfileName
|
56
|
+
def select_sql
|
57
|
+
<<-SQL.strip_heredoc
|
58
|
+
SELECT
|
59
|
+
profile.id AS profile_id,
|
60
|
+
CONCAT(users.first_name, ' ', users.last_name) AS profile_name
|
61
|
+
FROM profiles
|
62
|
+
INNER JOIN users ON
|
63
|
+
users.id = profiles.user_id
|
64
|
+
WHERE
|
65
|
+
profiles.name IS NULL
|
66
|
+
SQL
|
67
|
+
end
|
68
|
+
|
69
|
+
def update_sql(connection, row)
|
70
|
+
<<-SQL.strip_heredoc
|
71
|
+
UPDATE profiles SET
|
72
|
+
name = #{connection.quote(row['profile_name'])}
|
73
|
+
WHERE
|
74
|
+
id = #{connection.quote(row[:profile_id])}
|
75
|
+
SQL
|
76
|
+
end
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
And then just run rake task:
|
81
|
+
|
82
|
+
```bash
|
83
|
+
$ rails db:backfill[profile_name]
|
84
|
+
```
|
85
|
+
|
86
|
+
|
87
|
+
## Configuration
|
88
|
+
|
89
|
+
For Rails application backfiller is initialized with next options
|
90
|
+
|
91
|
+
* task_directory: `RAILS_ROOT/db/backfill`
|
92
|
+
* task_namespace: `Backfill`
|
93
|
+
* batch_size - `1_000`
|
94
|
+
* connection_pool: `ApplicationRecord.connection_pool`
|
95
|
+
* logger: `ApplicationRecord.logger`
|
96
|
+
|
97
|
+
You may change it globally via `config/initializers/backfiller.rb`:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
Backfiller.configure do |config|
|
101
|
+
config.foo = bar
|
102
|
+
end
|
103
|
+
```
|
104
|
+
|
105
|
+
Or specify some options in certain backfill task
|
106
|
+
|
107
|
+
```ruby
|
108
|
+
class Backfill::Foo
|
109
|
+
def batch_size
|
110
|
+
100
|
111
|
+
end
|
112
|
+
end
|
113
|
+
```
|
114
|
+
|
115
|
+
## Authors
|
116
|
+
|
117
|
+
* [Andriy Yanko](http://ayanko.github.io)
|
data/Rakefile
ADDED
data/backfiller.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "backfiller"
|
7
|
+
spec.version = "0.0.1"
|
8
|
+
spec.authors = ["Andriy Yanko"]
|
9
|
+
spec.email = ["andriy.yanko@railsware.com"]
|
10
|
+
|
11
|
+
spec.summary = %q{Backfiller for null database columns}
|
12
|
+
spec.homepage = "https://github.com/railsware/backfiller"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
|
+
f.match(%r{^(test|spec|features)/})
|
17
|
+
end
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency "activerecord", ">= 5.0.0"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.15"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "backfiller"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/backfiller.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Backfiller
|
2
|
+
|
3
|
+
class << self
|
4
|
+
def configure
|
5
|
+
yield self
|
6
|
+
end
|
7
|
+
|
8
|
+
attr_accessor :task_directory
|
9
|
+
|
10
|
+
attr_accessor :task_namespace
|
11
|
+
|
12
|
+
attr_accessor :connection_pool
|
13
|
+
|
14
|
+
attr_accessor :batch_size
|
15
|
+
|
16
|
+
attr_accessor :logger
|
17
|
+
|
18
|
+
def run(task_name)
|
19
|
+
Backfiller::Runner.new(task_name).run
|
20
|
+
end
|
21
|
+
|
22
|
+
def log(message)
|
23
|
+
logger.info "[Backfiller] #{message}" if logger
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'backfiller/cursor/postgresql'
|
2
|
+
|
3
|
+
module Backfiller
|
4
|
+
module Cursor
|
5
|
+
|
6
|
+
def self.new(connection, *args)
|
7
|
+
case connection
|
8
|
+
when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
|
9
|
+
Backfiller::Cursor::Postgresql.new(connection, *args)
|
10
|
+
else
|
11
|
+
raise "Unsupported connection #{connection.inspect}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Backfiller
|
2
|
+
module Cursor
|
3
|
+
class Postgresql
|
4
|
+
|
5
|
+
attr_reader :connection
|
6
|
+
|
7
|
+
def initialize(connection, name, query)
|
8
|
+
@connection = connection
|
9
|
+
@name = name
|
10
|
+
@query = query
|
11
|
+
end
|
12
|
+
|
13
|
+
def open
|
14
|
+
@connection.execute "DECLARE #{@name} NO SCROLL CURSOR WITHOUT HOLD FOR #{@query}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def fetch(count)
|
18
|
+
@connection.select_all "FETCH #{count} FROM #{@name}"
|
19
|
+
end
|
20
|
+
|
21
|
+
def close
|
22
|
+
@connection.execute "CLOSE #{@name}"
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Backfiller
|
2
|
+
class Railtie < Rails::Railtie
|
3
|
+
|
4
|
+
rake_tasks do
|
5
|
+
load 'backfiller/tasks/db.rake'
|
6
|
+
end
|
7
|
+
|
8
|
+
initializer 'backfiller.configure' do
|
9
|
+
Backfiller.configure do |config|
|
10
|
+
config.task_directory = Rails.root.join('db', 'backfill')
|
11
|
+
|
12
|
+
config.task_namespace = 'backfill'
|
13
|
+
|
14
|
+
config.batch_size = 1_000
|
15
|
+
|
16
|
+
config.connection_pool = defined?(ApplicationRecord) ? ApplicationRecord.connection_pool : ActiveRecord::Base.connection_pool
|
17
|
+
|
18
|
+
config.logger = defined?(ApplicationRecord) ? ApplicationRecord.logger : ActiveRecord::Base.logger
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
config.after_initialize do
|
23
|
+
task_module = Backfiller.task_namespace.classify
|
24
|
+
Object.const_set(task_module, Module.new) unless Object.const_defined?(task_module)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Backfiller
|
2
|
+
class Runner
|
3
|
+
|
4
|
+
attr_reader \
|
5
|
+
:task,
|
6
|
+
:connection_pool,
|
7
|
+
:batch_size
|
8
|
+
|
9
|
+
def initialize(task_name)
|
10
|
+
@task = build_task(task_name)
|
11
|
+
@connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : Backfiller.connection_pool
|
12
|
+
@batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
master_connection = acquire_connection
|
17
|
+
worker_connection = acquire_connection
|
18
|
+
|
19
|
+
fetch_each(master_connection) do |row|
|
20
|
+
update_row(worker_connection, row)
|
21
|
+
end
|
22
|
+
|
23
|
+
release_connection(master_connection)
|
24
|
+
release_connection(worker_connection)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def build_task(task_name)
|
30
|
+
Backfiller.log "Build #{task_name} task"
|
31
|
+
require File.join(Backfiller.task_directory, task_name)
|
32
|
+
"#{Backfiller.task_namespace}/#{task_name}".classify.constantize.new
|
33
|
+
end
|
34
|
+
|
35
|
+
###########################################################################
|
36
|
+
|
37
|
+
def acquire_connection
|
38
|
+
connection_pool.checkout
|
39
|
+
end
|
40
|
+
|
41
|
+
def release_connection(connection)
|
42
|
+
connection_pool.checkin(connection)
|
43
|
+
end
|
44
|
+
|
45
|
+
###########################################################################
|
46
|
+
|
47
|
+
def build_cursor(connection)
|
48
|
+
Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql)
|
49
|
+
end
|
50
|
+
|
51
|
+
def fetch_each(master_connection, &block)
|
52
|
+
cursor = build_cursor(master_connection)
|
53
|
+
|
54
|
+
cursor.connection.transaction do
|
55
|
+
Backfiller.log "Open cursor"
|
56
|
+
cursor.open
|
57
|
+
|
58
|
+
Backfiller.log "Start fetch loop"
|
59
|
+
fetch_loop(cursor, &block)
|
60
|
+
|
61
|
+
Backfiller.log "Close cursor"
|
62
|
+
cursor.close
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def fetch_loop(cursor, &block)
|
67
|
+
count = 0
|
68
|
+
|
69
|
+
loop do
|
70
|
+
result = cursor.fetch(batch_size)
|
71
|
+
|
72
|
+
break if result.empty?
|
73
|
+
|
74
|
+
result.each do |row|
|
75
|
+
block.call(row)
|
76
|
+
count += 1
|
77
|
+
end
|
78
|
+
|
79
|
+
Backfiller.log "Processed #{count}"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def update_row(connection, row)
|
84
|
+
connection.execute task.update_sql(connection, row)
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
namespace :db do
|
2
|
+
|
3
|
+
desc 'Run database backfill task'
|
4
|
+
task :backfill, [:name] => :environment do |t, args|
|
5
|
+
raise 'Please specify backfill task name' unless args[:name]
|
6
|
+
Backfiller.logger.level = :info if Backfiller.logger
|
7
|
+
Backfiller.run(args[:name])
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: backfiller
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andriy Yanko
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-09-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activerecord
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 5.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 5.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.15'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.15'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
description:
|
70
|
+
email:
|
71
|
+
- andriy.yanko@railsware.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".rspec"
|
78
|
+
- Gemfile
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- backfiller.gemspec
|
82
|
+
- bin/console
|
83
|
+
- bin/setup
|
84
|
+
- lib/backfiller.rb
|
85
|
+
- lib/backfiller/configuration.rb
|
86
|
+
- lib/backfiller/cursor.rb
|
87
|
+
- lib/backfiller/cursor/postgresql.rb
|
88
|
+
- lib/backfiller/railtie.rb
|
89
|
+
- lib/backfiller/runner.rb
|
90
|
+
- lib/backfiller/tasks/db.rake
|
91
|
+
homepage: https://github.com/railsware/backfiller
|
92
|
+
licenses:
|
93
|
+
- MIT
|
94
|
+
metadata: {}
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
require_paths:
|
98
|
+
- lib
|
99
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 2.6.11
|
112
|
+
signing_key:
|
113
|
+
specification_version: 4
|
114
|
+
summary: Backfiller for null database columns
|
115
|
+
test_files: []
|
116
|
+
has_rdoc:
|