backfiller 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3c3e9ee72c70a3b89caba10d6073a87058290680
4
+ data.tar.gz: 22071f36c634b6353787cac65a8c2984e8eb1ade
5
+ SHA512:
6
+ metadata.gz: 3931af99bdc3771c1636c24e4ce1374ca0af0739090c6f56defd530f4fa26f6cd3ac497feb9cd2824fa91a7f8c6a2c4e3cef0b64fb51b4d76cd4f13d90c033c1
7
+ data.tar.gz: c6bd78c26fd968f0df688bb89b52d22dc7681e9f781825300c4d5183310db95aaab56a6e1efc3b0ed8f0419ec20dd271e2b46d4114f2fbba89aed424e266debb
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+
11
+ # rspec failure tracking
12
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in backfiller.gemspec
6
+ gemspec
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # Backfiller
2
+
3
+ The backfill machine for null database columns.
4
+ This gem maybe handly for `no-downtime` deployment especially when you need to fill columns for table with huge amount for records without locking the table.
5
+
6
+ ## Typical no-downtime and non-locking cycle
7
+
8
+ * add migaration that adds new column (null: true)
9
+ * deploy and run migration task
10
+ * deploy code that starts filling new column in corresponding flows
11
+ * add backfill task
12
+ * deploy and run backflill task
13
+ * [optional] add migration that invokes backfill task asn so keep all environments consistent (except production environment because we already backfilled data)
14
+ * add migration that disallow null values (null: false)
15
+ * deploy code that starts using new column
16
+
17
+
18
+ ## Concept
19
+
20
+ Idea is to prepare all data in selection method on database server and fetch all data using CURSOR and then build simple UPDATE queries.
21
+ With this way we minimize db server resources usage and we lock only one record (atomic update).
22
+ We use two connections to database:
23
+ * master - to creates cursor in transaction and fetch data in batches.
24
+ * worker - to execute small atomic update queries (no wrapper transaction)
25
+
26
+ Even if backfill process crashes you may resolve issue and run it again to process remaining amount of data.
27
+
28
+ ## Connection adapters
29
+
30
+ Curently it supports only PostgreSQL ActiveRecord adapter.
31
+
32
+ ## Installation
33
+
34
+ Add this line to your application's Gemfile:
35
+
36
+ ```ruby
37
+ gem 'backfiller'
38
+ ```
39
+
40
+ And then execute:
41
+
42
+ $ bundle
43
+
44
+ Or install it yourself as:
45
+
46
+ $ gem install backfiller
47
+
48
+ ## Usage
49
+
50
+ Assume we we want to backfill `profiles.name` column from `users.first_name`, `users.last_name` columns.
51
+
52
+ Create backfill task into `db/backfill/profile_name.rb` and defined required methods:
53
+
54
+ ```ruby
55
+ class Backfill::ProfileName
56
+ def select_sql
57
+ <<-SQL.strip_heredoc
58
+ SELECT
59
+ profile.id AS profile_id,
60
+ CONCAT(users.first_name, ' ', users.last_name) AS profile_name
61
+ FROM profiles
62
+ INNER JOIN users ON
63
+ users.id = profiles.user_id
64
+ WHERE
65
+ profiles.name IS NULL
66
+ SQL
67
+ end
68
+
69
+ def update_sql(connection, row)
70
+ <<-SQL.strip_heredoc
71
+ UPDATE profiles SET
72
+ name = #{connection.quote(row['profile_name'])}
73
+ WHERE
74
+ id = #{connection.quote(row[:profile_id])}
75
+ SQL
76
+ end
77
+ end
78
+ ```
79
+
80
+ And then just run rake task:
81
+
82
+ ```bash
83
+ $ rails db:backfill[profile_name]
84
+ ```
85
+
86
+
87
+ ## Configuration
88
+
89
+ For Rails application backfiller is initialized with next options
90
+
91
+ * task_directory: `RAILS_ROOT/db/backfill`
92
+ * task_namespace: `Backfill`
93
+ * batch_size - `1_000`
94
+ * connection_pool: `ApplicationRecord.connection_pool`
95
+ * logger: `ApplicationRecord.logger`
96
+
97
+ You may change it globally via `config/initializers/backfiller.rb`:
98
+
99
+ ```ruby
100
+ Backfiller.configure do |config|
101
+ config.foo = bar
102
+ end
103
+ ```
104
+
105
+ Or specify some options in certain backfill task
106
+
107
+ ```ruby
108
+ class Backfill::Foo
109
+ def batch_size
110
+ 100
111
+ end
112
+ end
113
+ ```
114
+
115
+ ## Authors
116
+
117
+ * [Andriy Yanko](http://ayanko.github.io)
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "backfiller"
7
+ spec.version = "0.0.1"
8
+ spec.authors = ["Andriy Yanko"]
9
+ spec.email = ["andriy.yanko@railsware.com"]
10
+
11
+ spec.summary = %q{Backfiller for null database columns}
12
+ spec.homepage = "https://github.com/railsware/backfiller"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
+ f.match(%r{^(test|spec|features)/})
17
+ end
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "activerecord", ">= 5.0.0"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.15"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "backfiller"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/backfiller.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'backfiller/configuration'
2
+ require 'backfiller/cursor'
3
+ require 'backfiller/runner'
4
+
5
+ require 'backfiller/railtie' if defined?(Rails)
@@ -0,0 +1,27 @@
1
+ module Backfiller
2
+
3
+ class << self
4
+ def configure
5
+ yield self
6
+ end
7
+
8
+ attr_accessor :task_directory
9
+
10
+ attr_accessor :task_namespace
11
+
12
+ attr_accessor :connection_pool
13
+
14
+ attr_accessor :batch_size
15
+
16
+ attr_accessor :logger
17
+
18
+ def run(task_name)
19
+ Backfiller::Runner.new(task_name).run
20
+ end
21
+
22
+ def log(message)
23
+ logger.info "[Backfiller] #{message}" if logger
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,16 @@
1
+ require 'backfiller/cursor/postgresql'
2
+
3
+ module Backfiller
4
+ module Cursor
5
+
6
+ def self.new(connection, *args)
7
+ case connection
8
+ when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
9
+ Backfiller::Cursor::Postgresql.new(connection, *args)
10
+ else
11
+ raise "Unsupported connection #{connection.inspect}"
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,27 @@
1
+ module Backfiller
2
+ module Cursor
3
+ class Postgresql
4
+
5
+ attr_reader :connection
6
+
7
+ def initialize(connection, name, query)
8
+ @connection = connection
9
+ @name = name
10
+ @query = query
11
+ end
12
+
13
+ def open
14
+ @connection.execute "DECLARE #{@name} NO SCROLL CURSOR WITHOUT HOLD FOR #{@query}"
15
+ end
16
+
17
+ def fetch(count)
18
+ @connection.select_all "FETCH #{count} FROM #{@name}"
19
+ end
20
+
21
+ def close
22
+ @connection.execute "CLOSE #{@name}"
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,28 @@
1
+ module Backfiller
2
+ class Railtie < Rails::Railtie
3
+
4
+ rake_tasks do
5
+ load 'backfiller/tasks/db.rake'
6
+ end
7
+
8
+ initializer 'backfiller.configure' do
9
+ Backfiller.configure do |config|
10
+ config.task_directory = Rails.root.join('db', 'backfill')
11
+
12
+ config.task_namespace = 'backfill'
13
+
14
+ config.batch_size = 1_000
15
+
16
+ config.connection_pool = defined?(ApplicationRecord) ? ApplicationRecord.connection_pool : ActiveRecord::Base.connection_pool
17
+
18
+ config.logger = defined?(ApplicationRecord) ? ApplicationRecord.logger : ActiveRecord::Base.logger
19
+ end
20
+ end
21
+
22
+ config.after_initialize do
23
+ task_module = Backfiller.task_namespace.classify
24
+ Object.const_set(task_module, Module.new) unless Object.const_defined?(task_module)
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,88 @@
1
+ module Backfiller
2
+ class Runner
3
+
4
+ attr_reader \
5
+ :task,
6
+ :connection_pool,
7
+ :batch_size
8
+
9
+ def initialize(task_name)
10
+ @task = build_task(task_name)
11
+ @connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : Backfiller.connection_pool
12
+ @batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
13
+ end
14
+
15
+ def run
16
+ master_connection = acquire_connection
17
+ worker_connection = acquire_connection
18
+
19
+ fetch_each(master_connection) do |row|
20
+ update_row(worker_connection, row)
21
+ end
22
+
23
+ release_connection(master_connection)
24
+ release_connection(worker_connection)
25
+ end
26
+
27
+ private
28
+
29
+ def build_task(task_name)
30
+ Backfiller.log "Build #{task_name} task"
31
+ require File.join(Backfiller.task_directory, task_name)
32
+ "#{Backfiller.task_namespace}/#{task_name}".classify.constantize.new
33
+ end
34
+
35
+ ###########################################################################
36
+
37
+ def acquire_connection
38
+ connection_pool.checkout
39
+ end
40
+
41
+ def release_connection(connection)
42
+ connection_pool.checkin(connection)
43
+ end
44
+
45
+ ###########################################################################
46
+
47
+ def build_cursor(connection)
48
+ Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql)
49
+ end
50
+
51
+ def fetch_each(master_connection, &block)
52
+ cursor = build_cursor(master_connection)
53
+
54
+ cursor.connection.transaction do
55
+ Backfiller.log "Open cursor"
56
+ cursor.open
57
+
58
+ Backfiller.log "Start fetch loop"
59
+ fetch_loop(cursor, &block)
60
+
61
+ Backfiller.log "Close cursor"
62
+ cursor.close
63
+ end
64
+ end
65
+
66
+ def fetch_loop(cursor, &block)
67
+ count = 0
68
+
69
+ loop do
70
+ result = cursor.fetch(batch_size)
71
+
72
+ break if result.empty?
73
+
74
+ result.each do |row|
75
+ block.call(row)
76
+ count += 1
77
+ end
78
+
79
+ Backfiller.log "Processed #{count}"
80
+ end
81
+ end
82
+
83
+ def update_row(connection, row)
84
+ connection.execute task.update_sql(connection, row)
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1,10 @@
1
+ namespace :db do
2
+
3
+ desc 'Run database backfill task'
4
+ task :backfill, [:name] => :environment do |t, args|
5
+ raise 'Please specify backfill task name' unless args[:name]
6
+ Backfiller.logger.level = :info if Backfiller.logger
7
+ Backfiller.run(args[:name])
8
+ end
9
+
10
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: backfiller
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andriy Yanko
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-09-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 5.0.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 5.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.15'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.15'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ description:
70
+ email:
71
+ - andriy.yanko@railsware.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".rspec"
78
+ - Gemfile
79
+ - README.md
80
+ - Rakefile
81
+ - backfiller.gemspec
82
+ - bin/console
83
+ - bin/setup
84
+ - lib/backfiller.rb
85
+ - lib/backfiller/configuration.rb
86
+ - lib/backfiller/cursor.rb
87
+ - lib/backfiller/cursor/postgresql.rb
88
+ - lib/backfiller/railtie.rb
89
+ - lib/backfiller/runner.rb
90
+ - lib/backfiller/tasks/db.rake
91
+ homepage: https://github.com/railsware/backfiller
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.11
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Backfiller for null database columns
115
+ test_files: []
116
+ has_rdoc: