backfiller 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3c3e9ee72c70a3b89caba10d6073a87058290680
4
+ data.tar.gz: 22071f36c634b6353787cac65a8c2984e8eb1ade
5
+ SHA512:
6
+ metadata.gz: 3931af99bdc3771c1636c24e4ce1374ca0af0739090c6f56defd530f4fa26f6cd3ac497feb9cd2824fa91a7f8c6a2c4e3cef0b64fb51b4d76cd4f13d90c033c1
7
+ data.tar.gz: c6bd78c26fd968f0df688bb89b52d22dc7681e9f781825300c4d5183310db95aaab56a6e1efc3b0ed8f0419ec20dd271e2b46d4114f2fbba89aed424e266debb
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+
11
+ # rspec failure tracking
12
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in backfiller.gemspec
6
+ gemspec
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # Backfiller
2
+
3
+ The backfill machine for null database columns.
4
+ This gem maybe handly for `no-downtime` deployment especially when you need to fill columns for table with huge amount for records without locking the table.
5
+
6
+ ## Typical no-downtime and non-locking cycle
7
+
8
+ * add migaration that adds new column (null: true)
9
+ * deploy and run migration task
10
+ * deploy code that starts filling new column in corresponding flows
11
+ * add backfill task
12
+ * deploy and run backflill task
13
+ * [optional] add migration that invokes backfill task asn so keep all environments consistent (except production environment because we already backfilled data)
14
+ * add migration that disallow null values (null: false)
15
+ * deploy code that starts using new column
16
+
17
+
18
+ ## Concept
19
+
20
+ Idea is to prepare all data in selection method on database server and fetch all data using CURSOR and then build simple UPDATE queries.
21
+ With this way we minimize db server resources usage and we lock only one record (atomic update).
22
+ We use two connections to database:
23
+ * master - to creates cursor in transaction and fetch data in batches.
24
+ * worker - to execute small atomic update queries (no wrapper transaction)
25
+
26
+ Even if backfill process crashes you may resolve issue and run it again to process remaining amount of data.
27
+
28
+ ## Connection adapters
29
+
30
+ Curently it supports only PostgreSQL ActiveRecord adapter.
31
+
32
+ ## Installation
33
+
34
+ Add this line to your application's Gemfile:
35
+
36
+ ```ruby
37
+ gem 'backfiller'
38
+ ```
39
+
40
+ And then execute:
41
+
42
+ $ bundle
43
+
44
+ Or install it yourself as:
45
+
46
+ $ gem install backfiller
47
+
48
+ ## Usage
49
+
50
+ Assume we we want to backfill `profiles.name` column from `users.first_name`, `users.last_name` columns.
51
+
52
+ Create backfill task into `db/backfill/profile_name.rb` and defined required methods:
53
+
54
+ ```ruby
55
+ class Backfill::ProfileName
56
+ def select_sql
57
+ <<-SQL.strip_heredoc
58
+ SELECT
59
+ profile.id AS profile_id,
60
+ CONCAT(users.first_name, ' ', users.last_name) AS profile_name
61
+ FROM profiles
62
+ INNER JOIN users ON
63
+ users.id = profiles.user_id
64
+ WHERE
65
+ profiles.name IS NULL
66
+ SQL
67
+ end
68
+
69
+ def update_sql(connection, row)
70
+ <<-SQL.strip_heredoc
71
+ UPDATE profiles SET
72
+ name = #{connection.quote(row['profile_name'])}
73
+ WHERE
74
+ id = #{connection.quote(row[:profile_id])}
75
+ SQL
76
+ end
77
+ end
78
+ ```
79
+
80
+ And then just run rake task:
81
+
82
+ ```bash
83
+ $ rails db:backfill[profile_name]
84
+ ```
85
+
86
+
87
+ ## Configuration
88
+
89
+ For Rails application backfiller is initialized with next options
90
+
91
+ * task_directory: `RAILS_ROOT/db/backfill`
92
+ * task_namespace: `Backfill`
93
+ * batch_size - `1_000`
94
+ * connection_pool: `ApplicationRecord.connection_pool`
95
+ * logger: `ApplicationRecord.logger`
96
+
97
+ You may change it globally via `config/initializers/backfiller.rb`:
98
+
99
+ ```ruby
100
+ Backfiller.configure do |config|
101
+ config.foo = bar
102
+ end
103
+ ```
104
+
105
+ Or specify some options in certain backfill task
106
+
107
+ ```ruby
108
+ class Backfill::Foo
109
+ def batch_size
110
+ 100
111
+ end
112
+ end
113
+ ```
114
+
115
+ ## Authors
116
+
117
+ * [Andriy Yanko](http://ayanko.github.io)
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "backfiller"
7
+ spec.version = "0.0.1"
8
+ spec.authors = ["Andriy Yanko"]
9
+ spec.email = ["andriy.yanko@railsware.com"]
10
+
11
+ spec.summary = %q{Backfiller for null database columns}
12
+ spec.homepage = "https://github.com/railsware/backfiller"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
+ f.match(%r{^(test|spec|features)/})
17
+ end
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "activerecord", ">= 5.0.0"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.15"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "backfiller"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/backfiller.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'backfiller/configuration'
2
+ require 'backfiller/cursor'
3
+ require 'backfiller/runner'
4
+
5
+ require 'backfiller/railtie' if defined?(Rails)
@@ -0,0 +1,27 @@
1
+ module Backfiller
2
+
3
+ class << self
4
+ def configure
5
+ yield self
6
+ end
7
+
8
+ attr_accessor :task_directory
9
+
10
+ attr_accessor :task_namespace
11
+
12
+ attr_accessor :connection_pool
13
+
14
+ attr_accessor :batch_size
15
+
16
+ attr_accessor :logger
17
+
18
+ def run(task_name)
19
+ Backfiller::Runner.new(task_name).run
20
+ end
21
+
22
+ def log(message)
23
+ logger.info "[Backfiller] #{message}" if logger
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,16 @@
1
+ require 'backfiller/cursor/postgresql'
2
+
3
+ module Backfiller
4
+ module Cursor
5
+
6
+ def self.new(connection, *args)
7
+ case connection
8
+ when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
9
+ Backfiller::Cursor::Postgresql.new(connection, *args)
10
+ else
11
+ raise "Unsupported connection #{connection.inspect}"
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,27 @@
1
+ module Backfiller
2
+ module Cursor
3
+ class Postgresql
4
+
5
+ attr_reader :connection
6
+
7
+ def initialize(connection, name, query)
8
+ @connection = connection
9
+ @name = name
10
+ @query = query
11
+ end
12
+
13
+ def open
14
+ @connection.execute "DECLARE #{@name} NO SCROLL CURSOR WITHOUT HOLD FOR #{@query}"
15
+ end
16
+
17
+ def fetch(count)
18
+ @connection.select_all "FETCH #{count} FROM #{@name}"
19
+ end
20
+
21
+ def close
22
+ @connection.execute "CLOSE #{@name}"
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,28 @@
1
+ module Backfiller
2
+ class Railtie < Rails::Railtie
3
+
4
+ rake_tasks do
5
+ load 'backfiller/tasks/db.rake'
6
+ end
7
+
8
+ initializer 'backfiller.configure' do
9
+ Backfiller.configure do |config|
10
+ config.task_directory = Rails.root.join('db', 'backfill')
11
+
12
+ config.task_namespace = 'backfill'
13
+
14
+ config.batch_size = 1_000
15
+
16
+ config.connection_pool = defined?(ApplicationRecord) ? ApplicationRecord.connection_pool : ActiveRecord::Base.connection_pool
17
+
18
+ config.logger = defined?(ApplicationRecord) ? ApplicationRecord.logger : ActiveRecord::Base.logger
19
+ end
20
+ end
21
+
22
+ config.after_initialize do
23
+ task_module = Backfiller.task_namespace.classify
24
+ Object.const_set(task_module, Module.new) unless Object.const_defined?(task_module)
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,88 @@
1
+ module Backfiller
2
+ class Runner
3
+
4
+ attr_reader \
5
+ :task,
6
+ :connection_pool,
7
+ :batch_size
8
+
9
+ def initialize(task_name)
10
+ @task = build_task(task_name)
11
+ @connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : Backfiller.connection_pool
12
+ @batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
13
+ end
14
+
15
+ def run
16
+ master_connection = acquire_connection
17
+ worker_connection = acquire_connection
18
+
19
+ fetch_each(master_connection) do |row|
20
+ update_row(worker_connection, row)
21
+ end
22
+
23
+ release_connection(master_connection)
24
+ release_connection(worker_connection)
25
+ end
26
+
27
+ private
28
+
29
+ def build_task(task_name)
30
+ Backfiller.log "Build #{task_name} task"
31
+ require File.join(Backfiller.task_directory, task_name)
32
+ "#{Backfiller.task_namespace}/#{task_name}".classify.constantize.new
33
+ end
34
+
35
+ ###########################################################################
36
+
37
+ def acquire_connection
38
+ connection_pool.checkout
39
+ end
40
+
41
+ def release_connection(connection)
42
+ connection_pool.checkin(connection)
43
+ end
44
+
45
+ ###########################################################################
46
+
47
+ def build_cursor(connection)
48
+ Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql)
49
+ end
50
+
51
+ def fetch_each(master_connection, &block)
52
+ cursor = build_cursor(master_connection)
53
+
54
+ cursor.connection.transaction do
55
+ Backfiller.log "Open cursor"
56
+ cursor.open
57
+
58
+ Backfiller.log "Start fetch loop"
59
+ fetch_loop(cursor, &block)
60
+
61
+ Backfiller.log "Close cursor"
62
+ cursor.close
63
+ end
64
+ end
65
+
66
+ def fetch_loop(cursor, &block)
67
+ count = 0
68
+
69
+ loop do
70
+ result = cursor.fetch(batch_size)
71
+
72
+ break if result.empty?
73
+
74
+ result.each do |row|
75
+ block.call(row)
76
+ count += 1
77
+ end
78
+
79
+ Backfiller.log "Processed #{count}"
80
+ end
81
+ end
82
+
83
+ def update_row(connection, row)
84
+ connection.execute task.update_sql(connection, row)
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1,10 @@
1
+ namespace :db do
2
+
3
+ desc 'Run database backfill task'
4
+ task :backfill, [:name] => :environment do |t, args|
5
+ raise 'Please specify backfill task name' unless args[:name]
6
+ Backfiller.logger.level = :info if Backfiller.logger
7
+ Backfiller.run(args[:name])
8
+ end
9
+
10
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: backfiller
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andriy Yanko
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-09-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 5.0.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 5.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.15'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.15'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ description:
70
+ email:
71
+ - andriy.yanko@railsware.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".rspec"
78
+ - Gemfile
79
+ - README.md
80
+ - Rakefile
81
+ - backfiller.gemspec
82
+ - bin/console
83
+ - bin/setup
84
+ - lib/backfiller.rb
85
+ - lib/backfiller/configuration.rb
86
+ - lib/backfiller/cursor.rb
87
+ - lib/backfiller/cursor/postgresql.rb
88
+ - lib/backfiller/railtie.rb
89
+ - lib/backfiller/runner.rb
90
+ - lib/backfiller/tasks/db.rake
91
+ homepage: https://github.com/railsware/backfiller
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.11
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Backfiller for null database columns
115
+ test_files: []
116
+ has_rdoc: