backfiller 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d546e155a0790fdb0190454b6f59ccfa7847000ea93e64c785b7af932446c2aa
4
- data.tar.gz: f8fb83e301f507909783ac29eb405c80d7497c318915a6af2c3df803f2a85ce2
3
+ metadata.gz: 18b60be56e9153bca870e126330f666518c012c885c2e3d86d5c24625b395780
4
+ data.tar.gz: 06e10fa60228916766f7740231add1e1acc0c3f7dec1b9a1055785d908941a29
5
5
  SHA512:
6
- metadata.gz: 6741e63a19b01bbc57829817f558838ccbe31adc18ec53fcad2115b23cba122a396e4981d0b773ad46a3af3d531771efd29fbca5d20a700a9a849a31a196b2d4
7
- data.tar.gz: 0bf707056834e42d4e94fcf50477d121eb4e2fb8b4b31a98e13eb5be926a7cb32d1260726fd10f14d43ed0c3631bafc5788257cc835dab551cba57d084250fc8
6
+ metadata.gz: 0315e6720c7ec5c15904696843caef797a5beceb2771934b01dcc61fddbbb4f65c28fb9b4a60ad94ea2c1d31161facc64f5016a8a236f506ea02f560b94e9262
7
+ data.tar.gz: 5a085ff3f03022926e6e188f1900f3697658ca78489e45273fce7fc6d6a78e93708c481c7bcfd09ddf049fe223c04f8ac6f625d7dc0fa103282070b1297d23f0
data/.gitignore CHANGED
@@ -7,6 +7,7 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ log/*.log
10
11
 
11
12
  # rspec failure tracking
12
13
  .rspec_status
data/.rspec CHANGED
@@ -1,2 +1,3 @@
1
+ --require spec_helper
1
2
  --format documentation
2
3
  --color
data/.rubocop.yml ADDED
@@ -0,0 +1,16 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ SuggestExtensions: false
4
+
5
+ Style/Documentation:
6
+ Enabled: false
7
+
8
+ Style/AccessorGrouping:
9
+ Enabled: false
10
+
11
+ Metrics/BlockLength:
12
+ Exclude:
13
+ - 'spec/**/*_spec.rb'
14
+
15
+ Metrics/MethodLength:
16
+ Max: 20
data/.travis.yml ADDED
@@ -0,0 +1,14 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.7.2
4
+ gemfile:
5
+ - Gemfile
6
+ cache: bundler
7
+ before_install:
8
+ - gem install bundler
9
+ before_script:
10
+ - psql -c 'create database test;' -U postgres
11
+ script:
12
+ - bundle exec rake ci
13
+ services:
14
+ - postgresql
data/Changelog.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.0
4
+
5
+ * Upgrade local gems
6
+ * Add rubocop
7
+ * Add specs
8
+ * Configure travis
9
+ * Add `cursor_threshold` feature
10
+
3
11
  ## 0.1.1
4
12
 
5
13
  * Support Rails 6.0.0
data/Gemfile CHANGED
@@ -1,6 +1,5 @@
1
- source "https://rubygems.org"
1
+ # frozen_string_literal: true
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ source 'https://rubygems.org'
4
4
 
5
- # Specify your gem's dependencies in backfiller.gemspec
6
5
  gemspec
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  ![Backfill machine](https://railsware.github.io/backfiller/assets/backfill_machine.jpg)
2
2
 
3
- # Backfiller
3
+ # Backfiller [![Build Status](https://travis-ci.com/railsware/backfiller.svg?branch=master)](https://travis-ci.com/railsware/backfiller)
4
4
 
5
5
  The backfill machine for null database columns.
6
6
  This gem maybe handly for `no-downtime` deployment especially when you need to fill columns for table with huge amount for records without locking the table.
data/Rakefile CHANGED
@@ -1,6 +1,11 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+ require 'rubocop/rake_task'
3
6
 
4
7
  RSpec::Core::RakeTask.new(:spec)
8
+ RuboCop::RakeTask.new
5
9
 
6
- task :default => :spec
10
+ desc 'CI build'
11
+ task ci: %i[spec rubocop]
data/backfiller.gemspec CHANGED
@@ -1,27 +1,33 @@
1
- # coding: utf-8
2
- lib = File.expand_path("../lib", __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
 
5
6
  Gem::Specification.new do |spec|
6
- spec.name = "backfiller"
7
- spec.version = "0.1.1"
8
- spec.authors = ["Andriy Yanko"]
9
- spec.email = ["andriy.yanko@railsware.com"]
7
+ spec.name = 'backfiller'
8
+ spec.version = '0.2.0'
9
+ spec.authors = ['Andriy Yanko']
10
+ spec.email = ['andriy.yanko@railsware.com']
11
+
12
+ spec.summary = 'Backfiller for null database columns'
13
+ spec.homepage = 'https://github.com/railsware/backfiller'
14
+ spec.license = 'MIT'
10
15
 
11
- spec.summary = %q{Backfiller for null database columns}
12
- spec.homepage = "https://github.com/railsware/backfiller"
13
- spec.license = "MIT"
16
+ spec.required_ruby_version = '>= 2.7.0'
14
17
 
15
- spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
19
  f.match(%r{^(test|spec|features)/})
17
20
  end
18
- spec.bindir = "exe"
19
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
- spec.require_paths = ["lib"]
21
+ spec.bindir = 'exe'
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.add_dependency 'activerecord', '>= 6.0.0'
21
26
 
22
- spec.add_dependency "activerecord", ">= 5.0.0"
27
+ spec.add_development_dependency 'bundler', '~> 2.2.0'
28
+ spec.add_development_dependency 'rake', '~> 13.0.0'
29
+ spec.add_development_dependency 'rspec', '~> 3.10.0'
30
+ spec.add_development_dependency 'rubocop', '~> 1.18.0'
23
31
 
24
- spec.add_development_dependency "bundler", "~> 1.15"
25
- spec.add_development_dependency "rake", "~> 10.0"
26
- spec.add_development_dependency "rspec", "~> 3.0"
32
+ spec.add_development_dependency 'pg', '~> 1.2.0'
27
33
  end
data/bin/console CHANGED
@@ -1,14 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
- require "bundler/setup"
4
- require "backfiller"
4
+ require 'bundler/setup'
5
+ require 'backfiller'
5
6
 
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
8
-
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require "pry"
11
- # Pry.start
12
-
13
- require "irb"
7
+ require 'irb'
14
8
  IRB.start(__FILE__)
data/db/backfill/.keep ADDED
File without changes
data/lib/backfiller.rb CHANGED
@@ -1,5 +1,7 @@
1
- require 'backfiller/configuration'
2
- require 'backfiller/cursor'
3
- require 'backfiller/runner'
1
+ # frozen_string_literal: true
4
2
 
5
- require 'backfiller/railtie' if defined?(Rails::Railtie)
3
+ require_relative 'backfiller/configuration'
4
+ require_relative 'backfiller/cursor'
5
+ require_relative 'backfiller/runner'
6
+
7
+ require_relative 'backfiller/railtie' if defined?(Rails::Railtie)
@@ -1,25 +1,36 @@
1
- module Backfiller
1
+ # frozen_string_literal: true
2
2
 
3
+ module Backfiller
3
4
  class << self
4
5
  def configure
5
6
  yield self
6
7
  end
7
8
 
9
+ # directory for backfill ruby classes
8
10
  attr_accessor :task_directory
9
11
 
12
+ # ruby module of backfill classes
10
13
  attr_accessor :task_namespace
11
14
 
15
+ # Max size of records in one cursor fetch
12
16
  attr_accessor :batch_size
13
17
 
18
+ # Size of processed records after which cursor will be re-opened
19
+ attr_accessor :cursor_threshold
20
+
21
+ # Logger
14
22
  attr_accessor :logger
15
23
 
24
+ # @param task_name [String] name of backfill task file
16
25
  def run(task_name)
17
26
  Backfiller::Runner.new(task_name).run
18
27
  end
19
28
 
29
+ # @param message [String] log message
20
30
  def log(message)
21
- logger.info "[Backfiller] #{message}" if logger
31
+ return unless logger
32
+
33
+ logger.info "[Backfiller] #{message}"
22
34
  end
23
35
  end
24
-
25
36
  end
@@ -1,8 +1,9 @@
1
- require 'backfiller/cursor/postgresql'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'cursor/postgresql'
2
4
 
3
5
  module Backfiller
4
6
  module Cursor
5
-
6
7
  def self.new(connection, *args)
7
8
  case connection
8
9
  when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
@@ -11,6 +12,5 @@ module Backfiller
11
12
  raise "Unsupported connection #{connection.inspect}"
12
13
  end
13
14
  end
14
-
15
15
  end
16
16
  end
@@ -1,7 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Backfiller
2
4
  module Cursor
3
5
  class Postgresql
4
-
5
6
  attr_reader :connection
6
7
 
7
8
  def initialize(connection, name, query)
@@ -10,6 +11,25 @@ module Backfiller
10
11
  @query = query
11
12
  end
12
13
 
14
+ # Open cursor, call black and close cursor in transaction.
15
+ #
16
+ # @return [Object] yielded block result.
17
+ def transaction
18
+ result = nil
19
+
20
+ @connection.transaction do
21
+ Backfiller.log 'Open cursor'
22
+ open
23
+
24
+ result = yield
25
+
26
+ Backfiller.log 'Close cursor'
27
+ close
28
+ end
29
+
30
+ result
31
+ end
32
+
13
33
  def open
14
34
  @connection.execute "DECLARE #{@name} NO SCROLL CURSOR WITHOUT HOLD FOR #{@query}"
15
35
  end
@@ -21,7 +41,6 @@ module Backfiller
21
41
  def close
22
42
  @connection.execute "CLOSE #{@name}"
23
43
  end
24
-
25
44
  end
26
45
  end
27
46
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Backfiller
2
4
  class Railtie < Rails::Railtie
3
-
4
5
  rake_tasks do
5
6
  load 'backfiller/tasks/db.rake'
6
7
  end
@@ -13,6 +14,8 @@ module Backfiller
13
14
 
14
15
  config.batch_size = 1_000
15
16
 
17
+ config.cursor_threshold = 100_000
18
+
16
19
  config.logger = defined?(ApplicationRecord) ? ApplicationRecord.logger : ActiveRecord::Base.logger
17
20
  end
18
21
  end
@@ -21,6 +24,5 @@ module Backfiller
21
24
  task_module = Backfiller.task_namespace.classify
22
25
  Object.const_set(task_module, Module.new) unless Object.const_defined?(task_module)
23
26
  end
24
-
25
27
  end
26
28
  end
@@ -1,24 +1,52 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Backfiller
2
4
  class Runner
3
-
4
5
  attr_reader \
5
6
  :task,
6
7
  :connection_pool,
7
8
  :batch_size,
9
+ :cursor_threshold,
8
10
  :process_method
9
11
 
10
12
  def initialize(task_name)
11
13
  @task = build_task(task_name)
12
14
  @connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : default_connection_pool
13
15
  @batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size
14
- @process_method = @task.respond_to?(:process_row) ? @task.method(:process_row) : self.method(:process_row)
16
+ @cursor_threshold = @task.respond_to?(:cursor_threshold) ? @task.cursor_threshold : Backfiller.cursor_threshold
17
+ @process_method = @task.respond_to?(:process_row) ? @task.method(:process_row) : method(:process_row)
15
18
  end
16
19
 
20
+ # It uses two connections from pool:
21
+ # * master [M] - reads data using cursor in transaction
22
+ # * worker [W] - changes data based on record red from master
23
+ #
24
+ # @example
25
+ # [M] BEGIN
26
+ # [M] DECLARE backfill_cursor SCROLL CURSOR WITHOUT HOLD FOR SELECT * FROM users
27
+ # // Start fetch and process loop:
28
+ # [M] FETCH 1000 backfill_cursor
29
+ # [W] UPDATE users SET full_name = '...' where id = 1
30
+ # [W] ...
31
+ # [W] UPDATE users SET full_name = '...' where id = 1000
32
+ # [M] FETCH 1000 backfill_cursor
33
+ # [W] UPDATE users SET full_name = '...' where id = 1001
34
+ # [W] ...
35
+ # [W] UPDATE users SET full_name = '...' where id = 2000
36
+ # // Records per cursor transaction threshold reached. Reopen transaction.
37
+ # [M] CLOSE backfill_cursor
38
+ # [M] COMMIT
39
+ # [M] BEGIN
40
+ # [M] DECLARE backfill_cursor SCROLL CURSOR WITHOUT HOLD FOR SELECT * FROM users
41
+ # [M] FETCH 1000 backfill_cursor
42
+ # // The end of cursor reached. Break cursor loop and exit.
43
+ # [M] CLOSE backfill_cursor
44
+ # [M] COMMIT
17
45
  def run
18
46
  master_connection = acquire_connection
19
47
  worker_connection = acquire_connection
20
48
 
21
- fetch_each(master_connection) do |row|
49
+ run_cursor_loop(master_connection) do |row|
22
50
  process_method.call(worker_connection, row)
23
51
  end
24
52
 
@@ -50,32 +78,34 @@ module Backfiller
50
78
 
51
79
  ###########################################################################
52
80
 
53
- def build_cursor(connection)
54
- Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql)
55
- end
81
+ # Run loop that re-open cursor transaction on threshold
82
+ def run_cursor_loop(connection, &block)
83
+ Backfiller.log 'Start cursor loop'
56
84
 
57
- def fetch_each(master_connection, &block)
58
- cursor = build_cursor(master_connection)
85
+ total_count = 0
86
+ cursor = build_cursor(connection)
59
87
 
60
- cursor.connection.transaction do
61
- Backfiller.log "Open cursor"
62
- cursor.open
88
+ loop do
89
+ finished, count = cursor.transaction do
90
+ run_fetch_loop(cursor, &block)
91
+ end
63
92
 
64
- Backfiller.log "Start fetch loop"
65
- fetch_loop(cursor, &block)
93
+ total_count += count
66
94
 
67
- Backfiller.log "Close cursor"
68
- cursor.close
95
+ Backfiller.log "Total processed #{total_count}"
96
+ break if finished
69
97
  end
70
98
  end
71
99
 
72
- def fetch_loop(cursor, &block)
100
+ # @return [Array<Boolean, Integer>] finished_status/processed_count
101
+ def run_fetch_loop(cursor, &block)
102
+ Backfiller.log 'Start fetch loop'
73
103
  count = 0
74
104
 
75
105
  loop do
76
106
  result = cursor.fetch(batch_size)
77
107
 
78
- break if result.empty?
108
+ return [true, count] if result.empty?
79
109
 
80
110
  result.each do |row|
81
111
  block.call(row)
@@ -83,9 +113,19 @@ module Backfiller
83
113
  end
84
114
 
85
115
  Backfiller.log "Processed #{count}"
116
+
117
+ return [false, count] if count > cursor_threshold
86
118
  end
87
119
  end
88
120
 
121
+ ###########################################################################
122
+
123
+ # Build cursor object that will use master connection.
124
+ def build_cursor(connection)
125
+ Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql)
126
+ end
127
+
128
+ # Process row using worker connection.
89
129
  def process_row(connection, row)
90
130
  Array(task.execute_sql(connection, row)).each do |sql|
91
131
  connection.execute(sql)
@@ -1,10 +1,11 @@
1
- namespace :db do
1
+ # frozen_string_literal: true
2
2
 
3
+ namespace :db do
3
4
  desc 'Run database backfill task'
4
- task :backfill, [:name] => :environment do |t, args|
5
+ task :backfill, [:name] => :environment do |_, args|
5
6
  raise 'Please specify backfill task name' unless args[:name]
7
+
6
8
  Backfiller.logger.level = :info if Backfiller.logger
7
9
  Backfiller.run(args[:name])
8
10
  end
9
-
10
11
  end
data/log/.keep ADDED
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: backfiller
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andriy Yanko
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-31 00:00:00.000000000 Z
11
+ date: 2021-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,57 +16,85 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.0.0
19
+ version: 6.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.0.0
26
+ version: 6.0.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.15'
33
+ version: 2.2.0
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.15'
40
+ version: 2.2.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '10.0'
47
+ version: 13.0.0
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '10.0'
54
+ version: 13.0.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '3.0'
61
+ version: 3.10.0
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '3.0'
69
- description:
68
+ version: 3.10.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 1.18.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 1.18.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: pg
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.2.0
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 1.2.0
97
+ description:
70
98
  email:
71
99
  - andriy.yanko@railsware.com
72
100
  executables: []
@@ -75,6 +103,8 @@ extra_rdoc_files: []
75
103
  files:
76
104
  - ".gitignore"
77
105
  - ".rspec"
106
+ - ".rubocop.yml"
107
+ - ".travis.yml"
78
108
  - Changelog.md
79
109
  - Gemfile
80
110
  - README.md
@@ -82,6 +112,7 @@ files:
82
112
  - backfiller.gemspec
83
113
  - bin/console
84
114
  - bin/setup
115
+ - db/backfill/.keep
85
116
  - lib/backfiller.rb
86
117
  - lib/backfiller/configuration.rb
87
118
  - lib/backfiller/cursor.rb
@@ -89,11 +120,12 @@ files:
89
120
  - lib/backfiller/railtie.rb
90
121
  - lib/backfiller/runner.rb
91
122
  - lib/backfiller/tasks/db.rake
123
+ - log/.keep
92
124
  homepage: https://github.com/railsware/backfiller
93
125
  licenses:
94
126
  - MIT
95
127
  metadata: {}
96
- post_install_message:
128
+ post_install_message:
97
129
  rdoc_options: []
98
130
  require_paths:
99
131
  - lib
@@ -101,15 +133,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
101
133
  requirements:
102
134
  - - ">="
103
135
  - !ruby/object:Gem::Version
104
- version: '0'
136
+ version: 2.7.0
105
137
  required_rubygems_version: !ruby/object:Gem::Requirement
106
138
  requirements:
107
139
  - - ">="
108
140
  - !ruby/object:Gem::Version
109
141
  version: '0'
110
142
  requirements: []
111
- rubygems_version: 3.0.3
112
- signing_key:
143
+ rubygems_version: 3.1.4
144
+ signing_key:
113
145
  specification_version: 4
114
146
  summary: Backfiller for null database columns
115
147
  test_files: []