acts_as_scrubbable 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: a11d2fbbda7f3984de9911cba7b449d3f6c6720b17430f6511cf46407cd8a5c9
4
- data.tar.gz: 69a93697794fc1c7abd9b2acd2f9c068639b04f03391b5cc028492ebdebf44a1
2
+ SHA1:
3
+ metadata.gz: ead6fbeb0cee99c7046ad13c00423805d1099550
4
+ data.tar.gz: e31b55515b4cef3df1d78e482ec37745fdcf518c
5
5
  SHA512:
6
- metadata.gz: 4b40343ddb3c0b7f764fd48167d3ce3c0dd2cc91f2221652f515849effd1b6da67040fe376e2543240a0efd391aa91eb0347d9e2780b184bee500ac5cad2a319
7
- data.tar.gz: 18c59e0bb668fe98bbf584297d56fb41b14d2bef2e3f4e9e052b330e012a61b5eb9988ab493233db953959776390e529218bff2e0bc8203033712a48054cad7f
6
+ metadata.gz: 7c56b941e491ed7e7d76e8d1abaeaddebbea42e6d1d6b023f122698f0c5d6483b6959a0dbb45007b5ef0443401d12fb0b37d7c975282675a7acefdf332603783
7
+ data.tar.gz: 6a13305d96f630c4f37494d92ce64a83dbba5d19e0a6528460bb34616d80ba13b172803e3d112fbf5d1a6db113fdc145605f47c1808395a07f811f2814f09660
@@ -37,7 +37,7 @@ module ActsAsScrubbable
37
37
  :middle_name => -> { Faker::Name.name },
38
38
  :name => -> { Faker::Name.name },
39
39
  :email => -> { Faker::Internet.email },
40
- :name_title => -> { Faker::Name.title },
40
+ :name_title => -> { defined? Faker::Job ? Faker::Job.title : Faker::Name.title },
41
41
  :company_name => -> { Faker::Company.name },
42
42
  :street_address => -> { Faker::Address.street_address },
43
43
  :secondary_address => -> { Faker::Address.secondary_address },
@@ -0,0 +1,84 @@
1
+ require "parallel"
2
+
3
+ module ActsAsScrubbable
4
+ class ParallelTableScrubber
5
+ def initialize(ar_class)
6
+ @ar_class = ar_class
7
+ end
8
+
9
+ def scrub(num_batches:)
10
+ # Removing any find or initialize callbacks from model
11
+ ar_class.reset_callbacks(:initialize)
12
+ ar_class.reset_callbacks(:find)
13
+
14
+ queries = parallel_queries(ar_class: ar_class, num_batches: num_batches)
15
+ scrubbed_count = Parallel.map(queries) { |query|
16
+ scrubbed_count = 0
17
+ ActiveRecord::Base.connection_pool.with_connection do
18
+ relation = ar_class
19
+ relation = relation.send(:scrubbable_scope) if ar_class.respond_to?(:scrubbable_scope)
20
+ relation.where(query).find_in_batches(batch_size: 1000) do |batch|
21
+ ActiveRecord::Base.transaction do
22
+ batch.each do |obj|
23
+ obj.scrub!
24
+ scrubbed_count += 1
25
+ end
26
+ end
27
+ end
28
+ end
29
+ scrubbed_count
30
+ }.reduce(:+)
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :ar_class
36
+
37
+ # create even ID ranges for the table
38
+ def parallel_queries(ar_class:, num_batches:)
39
+ raise "Model is missing id column" if ar_class.columns.none? { |column| column.name == "id" }
40
+
41
+ if ar_class.respond_to?(:scrubbable_scope)
42
+ num_records = ar_class.send(:scrubbable_scope).count
43
+ else
44
+ num_records = ar_class.count
45
+ end
46
+ return [] if num_records == 0 # no records to import
47
+
48
+ record_window_size, modulus = num_records.divmod(num_batches)
49
+ if record_window_size < 1
50
+ record_window_size = 1
51
+ modulus = 0
52
+ end
53
+
54
+ start_id = next_id(ar_class: ar_class, offset: 0)
55
+ queries = num_batches.times.each_with_object([]) do |_, queries|
56
+ next unless start_id
57
+
58
+ end_id = next_id(ar_class: ar_class, id: start_id, offset: record_window_size-1)
59
+ if modulus > 0
60
+ end_id = next_id(ar_class: ar_class, id: end_id)
61
+ modulus -= 1
62
+ end
63
+ queries << {id: start_id..end_id} if end_id
64
+ start_id = next_id(ar_class: ar_class, id: end_id || start_id)
65
+ end
66
+
67
+ # just in case new records are added since we started, extend the end ID
68
+ queries[-1] = ["#{ar_class.quoted_table_name}.id >= ?", queries[-1][:id].begin] if queries.any?
69
+
70
+ queries
71
+ end
72
+
73
+ def next_id(ar_class:, id: nil, offset: 1)
74
+ if ar_class.respond_to?(:scrubbable_scope)
75
+ collection = ar_class.send(:scrubbable_scope)
76
+ else
77
+ collection = ar_class.all
78
+ end
79
+ collection.reorder(:id)
80
+ collection = collection.where("#{ar_class.quoted_table_name}.id >= :id", id: id) if id
81
+ collection.offset(offset).limit(1).pluck(:id).first
82
+ end
83
+ end
84
+ end
@@ -3,64 +3,55 @@ require 'rake'
3
3
 
4
4
  namespace :scrub do
5
5
 
6
- desc "scrub all"
6
+ desc "scrub all scrubbable tables"
7
7
  task all: :environment do
8
-
9
8
  require 'highline/import'
10
9
  require 'term/ansicolor'
11
10
  require 'logger'
12
11
  require 'parallel'
13
12
 
14
-
15
13
  include Term::ANSIColor
16
14
 
17
- @logger = Logger.new($stdout)
18
- @logger.formatter = proc do |severity, datetime, progname, msg|
15
+ logger = Logger.new($stdout)
16
+ logger.formatter = proc do |severity, datetime, progname, msg|
19
17
  "#{datetime}: [#{severity}] - #{msg}\n"
20
18
  end
21
19
 
22
20
  db_host = ActiveRecord::Base.connection_config[:host]
23
21
  db_name = ActiveRecord::Base.connection_config[:database]
24
22
 
25
- @logger.warn "Please verify the information below to continue".red
26
- @logger.warn "Host: ".red + " #{db_host}".white
27
- @logger.warn "Database: ".red + "#{db_name}".white
23
+ logger.warn "Please verify the information below to continue".red
24
+ logger.warn "Host: ".red + " #{db_host}".white
25
+ logger.warn "Database: ".red + "#{db_name}".white
28
26
 
29
27
  unless ENV["SKIP_CONFIRM"] == "true"
30
-
31
28
  answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
32
29
  unless answer == db_host
33
- @logger.error "exiting ...".red
30
+ logger.error "exiting ...".red
34
31
  exit
35
32
  end
36
33
  end
37
34
 
38
- @logger.warn "Scrubbing classes".red
35
+ logger.warn "Scrubbing classes".red
39
36
 
40
37
  Rails.application.eager_load! # make sure all the classes are loaded
41
38
 
42
- @total_scrubbed = 0
43
-
44
39
  ar_classes = ActiveRecord::Base.descendants.select{|d| d.scrubbable? }.sort_by{|d| d.to_s }
45
40
 
46
-
47
- # if the ENV variable is set
48
-
49
- unless ENV["SCRUB_CLASSES"].blank?
41
+ if ENV["SCRUB_CLASSES"].present?
50
42
  class_list = ENV["SCRUB_CLASSES"].split(",")
51
43
  class_list = class_list.map {|_class_str| _class_str.constantize }
52
44
  ar_classes = ar_classes & class_list
53
45
  end
54
46
 
55
- @logger.info "Srubbable Classes: #{ar_classes.join(', ')}".white
47
+ logger.info "Scrubbable Classes: #{ar_classes.join(', ')}".white
56
48
 
57
49
  Parallel.each(ar_classes) do |ar_class|
58
-
59
50
  # Removing any find or initialize callbacks from model
60
51
  ar_class.reset_callbacks(:initialize)
61
52
  ar_class.reset_callbacks(:find)
62
53
 
63
- @logger.info "Scrubbing #{ar_class} ...".green
54
+ logger.info "Scrubbing #{ar_class} ...".green
64
55
 
65
56
  scrubbed_count = 0
66
57
 
@@ -81,16 +72,59 @@ namespace :scrub do
81
72
  end
82
73
  end
83
74
 
84
- @logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
75
+ logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
85
76
  end
86
77
  ActiveRecord::Base.connection.verify!
87
78
 
88
79
  if ENV["SKIP_AFTERHOOK"].blank?
89
- @logger.info "Running after hook".red
80
+ logger.info "Running after hook".red
90
81
  ActsAsScrubbable.execute_after_hook
91
82
  end
92
83
 
93
- @logger.info "Scrub Complete!".white
84
+ logger.info "Scrub Complete!".white
85
+ end
86
+
87
+ desc "Scrub one table"
88
+ task :model, [:ar_class] => :environment do |_, args|
89
+ require 'highline/import'
90
+ require 'term/ansicolor'
91
+ require 'logger'
92
+ require 'acts_as_scrubbable/parallel_table_scrubber'
93
+
94
+ include Term::ANSIColor
95
+
96
+ logger = Logger.new($stdout)
97
+ logger.formatter = proc do |severity, datetime, progname, msg|
98
+ "#{datetime}: [#{severity}] - #{msg}\n"
99
+ end
100
+
101
+ db_host = ActiveRecord::Base.connection_config[:host]
102
+ db_name = ActiveRecord::Base.connection_config[:database]
103
+
104
+ logger.warn "Please verify the information below to continue".red
105
+ logger.warn "Host: ".red + " #{db_host}".white
106
+ logger.warn "Database: ".red + "#{db_name}".white
107
+
108
+ unless ENV["SKIP_CONFIRM"] == "true"
109
+ answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
110
+ unless answer == db_host
111
+ logger.error "exiting ...".red
112
+ exit
113
+ end
114
+ end
115
+
116
+ Rails.application.eager_load! # make sure all the classes are loaded
117
+
118
+ ar_class = args[:ar_class].constantize
119
+ logger.info "Scrubbing #{ar_class} ...".green
120
+
121
+ num_batches = Integer(ENV.fetch("SCRUB_BATCHES", "256"))
122
+ scrubbed_count = ActsAsScrubbable::ParallelTableScrubber.new(ar_class).scrub(num_batches: num_batches)
123
+
124
+ logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
125
+ ActiveRecord::Base.connection.verify!
126
+
127
+ logger.info "Scrub Complete!".white
94
128
  end
95
129
  end
96
130
 
@@ -1,3 +1,3 @@
1
1
  module ActsAsScrubbable
2
- VERSION = '1.0.2'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -2,8 +2,23 @@ ActiveRecord::Schema.define(version: 20150421224501) do
2
2
 
3
3
  create_table "scrubbable_models", force: true do |t|
4
4
  t.string "first_name"
5
+ t.string "last_name"
6
+ t.string "middle_name"
7
+ t.string "name"
8
+ t.string "email"
9
+ t.string "title"
10
+ t.string "company_name"
5
11
  t.string "address1"
12
+ t.string "address2"
13
+ t.string "zip_code"
14
+ t.string "state"
15
+ t.string "state_short"
16
+ t.string "city"
6
17
  t.string "lat"
18
+ t.string "lon"
19
+ t.string "username"
20
+ t.boolean "active"
21
+ t.string "school"
7
22
  end
8
23
 
9
24
  end
@@ -5,9 +5,35 @@ RSpec.describe ActsAsScrubbable::Scrub do
5
5
  describe '.scrub' do
6
6
 
7
7
  # update_columns cannot be run on a new record
8
- subject{ ScrubbableModel.new }
8
+ subject { ScrubbableModel.new }
9
9
  before(:each) { subject.save }
10
10
 
11
+ it 'scrubs all columns' do
12
+ subject.attributes = {
13
+ first_name: "Ted",
14
+ last_name: "Lowe",
15
+ middle_name: "Cassidy",
16
+ name: "Miss Vincenzo Smitham",
17
+ email: "trentdibbert@wiza.com",
18
+ title: "Internal Consultant",
19
+ company_name: "Greenfelder, Collier and Lesch",
20
+ address1: "86780 Watsica Flats",
21
+ address2: "Apt. 913",
22
+ zip_code: "49227",
23
+ state: "Ohio",
24
+ state_short: "OH",
25
+ city: "Port Hildegard",
26
+ lat: -79.5855309778974,
27
+ lon: 13.517352691513906,
28
+ username: "oscar.hermann",
29
+ active: false,
30
+ school: "Eastern Lebsack",
31
+ }
32
+ expect {
33
+ subject.scrub!
34
+ }.not_to raise_error
35
+ end
36
+
11
37
  it 'changes the first_name attribute when scrub is run' do
12
38
  subject.first_name = "Ted"
13
39
  allow(Faker::Name).to receive(:first_name).and_return("John")
@@ -15,7 +15,24 @@ end
15
15
  class NonScrubbableModel < ActiveRecord::Base; end
16
16
 
17
17
  class ScrubbableModel < ActiveRecord::Base
18
- acts_as_scrubbable :first_name, :address1 => :street_address, :lat => :latitude
18
+ acts_as_scrubbable :first_name,
19
+ :last_name,
20
+ :middle_name,
21
+ :name,
22
+ :email,
23
+ :company_name,
24
+ :zip_code,
25
+ :state,
26
+ :city,
27
+ :username,
28
+ :school,
29
+ :title => :name_title,
30
+ :address1 => :street_address,
31
+ :address2 => :secondary_address,
32
+ :state_short => :state_abbr,
33
+ :lat => :latitude,
34
+ :lon => :longitude,
35
+ :active => :boolean
19
36
  attr_accessor :scrubbing_begun, :scrubbing_finished
20
37
  set_callback :scrub, :before do
21
38
  self.scrubbing_begun = true
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: acts_as_scrubbable
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samer Masry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-22 00:00:00.000000000 Z
11
+ date: 2019-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -226,6 +226,7 @@ files:
226
226
  - README.md
227
227
  - acts_as_scrubbable.gemspec
228
228
  - lib/acts_as_scrubbable.rb
229
+ - lib/acts_as_scrubbable/parallel_table_scrubber.rb
229
230
  - lib/acts_as_scrubbable/scrub.rb
230
231
  - lib/acts_as_scrubbable/scrubbable.rb
231
232
  - lib/acts_as_scrubbable/tasks.rb
@@ -256,7 +257,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
256
257
  version: '0'
257
258
  requirements: []
258
259
  rubyforge_project:
259
- rubygems_version: 2.7.6
260
+ rubygems_version: 2.6.14
260
261
  signing_key:
261
262
  specification_version: 4
262
263
  summary: Scrubbing data made easy