acts_as_scrubbable 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/acts_as_scrubbable.rb +1 -1
- data/lib/acts_as_scrubbable/parallel_table_scrubber.rb +84 -0
- data/lib/acts_as_scrubbable/tasks.rb +57 -23
- data/lib/acts_as_scrubbable/version.rb +1 -1
- data/spec/db/schema.rb +15 -0
- data/spec/lib/acts_as_scrubbable/scrub_spec.rb +27 -1
- data/spec/support/database.rb +18 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ead6fbeb0cee99c7046ad13c00423805d1099550
|
4
|
+
data.tar.gz: e31b55515b4cef3df1d78e482ec37745fdcf518c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c56b941e491ed7e7d76e8d1abaeaddebbea42e6d1d6b023f122698f0c5d6483b6959a0dbb45007b5ef0443401d12fb0b37d7c975282675a7acefdf332603783
|
7
|
+
data.tar.gz: 6a13305d96f630c4f37494d92ce64a83dbba5d19e0a6528460bb34616d80ba13b172803e3d112fbf5d1a6db113fdc145605f47c1808395a07f811f2814f09660
|
data/lib/acts_as_scrubbable.rb
CHANGED
@@ -37,7 +37,7 @@ module ActsAsScrubbable
|
|
37
37
|
:middle_name => -> { Faker::Name.name },
|
38
38
|
:name => -> { Faker::Name.name },
|
39
39
|
:email => -> { Faker::Internet.email },
|
40
|
-
:name_title => -> { Faker::Name.title },
|
40
|
+
:name_title => -> { defined? Faker::Job ? Faker::Job.title : Faker::Name.title },
|
41
41
|
:company_name => -> { Faker::Company.name },
|
42
42
|
:street_address => -> { Faker::Address.street_address },
|
43
43
|
:secondary_address => -> { Faker::Address.secondary_address },
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require "parallel"
|
2
|
+
|
3
|
+
module ActsAsScrubbable
|
4
|
+
class ParallelTableScrubber
|
5
|
+
def initialize(ar_class)
|
6
|
+
@ar_class = ar_class
|
7
|
+
end
|
8
|
+
|
9
|
+
def scrub(num_batches:)
|
10
|
+
# Removing any find or initialize callbacks from model
|
11
|
+
ar_class.reset_callbacks(:initialize)
|
12
|
+
ar_class.reset_callbacks(:find)
|
13
|
+
|
14
|
+
queries = parallel_queries(ar_class: ar_class, num_batches: num_batches)
|
15
|
+
scrubbed_count = Parallel.map(queries) { |query|
|
16
|
+
scrubbed_count = 0
|
17
|
+
ActiveRecord::Base.connection_pool.with_connection do
|
18
|
+
relation = ar_class
|
19
|
+
relation = relation.send(:scrubbable_scope) if ar_class.respond_to?(:scrubbable_scope)
|
20
|
+
relation.where(query).find_in_batches(batch_size: 1000) do |batch|
|
21
|
+
ActiveRecord::Base.transaction do
|
22
|
+
batch.each do |obj|
|
23
|
+
obj.scrub!
|
24
|
+
scrubbed_count += 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
scrubbed_count
|
30
|
+
}.reduce(:+)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :ar_class
|
36
|
+
|
37
|
+
# create even ID ranges for the table
|
38
|
+
def parallel_queries(ar_class:, num_batches:)
|
39
|
+
raise "Model is missing id column" if ar_class.columns.none? { |column| column.name == "id" }
|
40
|
+
|
41
|
+
if ar_class.respond_to?(:scrubbable_scope)
|
42
|
+
num_records = ar_class.send(:scrubbable_scope).count
|
43
|
+
else
|
44
|
+
num_records = ar_class.count
|
45
|
+
end
|
46
|
+
return [] if num_records == 0 # no records to import
|
47
|
+
|
48
|
+
record_window_size, modulus = num_records.divmod(num_batches)
|
49
|
+
if record_window_size < 1
|
50
|
+
record_window_size = 1
|
51
|
+
modulus = 0
|
52
|
+
end
|
53
|
+
|
54
|
+
start_id = next_id(ar_class: ar_class, offset: 0)
|
55
|
+
queries = num_batches.times.each_with_object([]) do |_, queries|
|
56
|
+
next unless start_id
|
57
|
+
|
58
|
+
end_id = next_id(ar_class: ar_class, id: start_id, offset: record_window_size-1)
|
59
|
+
if modulus > 0
|
60
|
+
end_id = next_id(ar_class: ar_class, id: end_id)
|
61
|
+
modulus -= 1
|
62
|
+
end
|
63
|
+
queries << {id: start_id..end_id} if end_id
|
64
|
+
start_id = next_id(ar_class: ar_class, id: end_id || start_id)
|
65
|
+
end
|
66
|
+
|
67
|
+
# just in case new records are added since we started, extend the end ID
|
68
|
+
queries[-1] = ["#{ar_class.quoted_table_name}.id >= ?", queries[-1][:id].begin] if queries.any?
|
69
|
+
|
70
|
+
queries
|
71
|
+
end
|
72
|
+
|
73
|
+
def next_id(ar_class:, id: nil, offset: 1)
|
74
|
+
if ar_class.respond_to?(:scrubbable_scope)
|
75
|
+
collection = ar_class.send(:scrubbable_scope)
|
76
|
+
else
|
77
|
+
collection = ar_class.all
|
78
|
+
end
|
79
|
+
collection.reorder(:id)
|
80
|
+
collection = collection.where("#{ar_class.quoted_table_name}.id >= :id", id: id) if id
|
81
|
+
collection.offset(offset).limit(1).pluck(:id).first
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -3,64 +3,55 @@ require 'rake'
|
|
3
3
|
|
4
4
|
namespace :scrub do
|
5
5
|
|
6
|
-
desc "scrub all"
|
6
|
+
desc "scrub all scrubbable tables"
|
7
7
|
task all: :environment do
|
8
|
-
|
9
8
|
require 'highline/import'
|
10
9
|
require 'term/ansicolor'
|
11
10
|
require 'logger'
|
12
11
|
require 'parallel'
|
13
12
|
|
14
|
-
|
15
13
|
include Term::ANSIColor
|
16
14
|
|
17
|
-
|
18
|
-
|
15
|
+
logger = Logger.new($stdout)
|
16
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
19
17
|
"#{datetime}: [#{severity}] - #{msg}\n"
|
20
18
|
end
|
21
19
|
|
22
20
|
db_host = ActiveRecord::Base.connection_config[:host]
|
23
21
|
db_name = ActiveRecord::Base.connection_config[:database]
|
24
22
|
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
logger.warn "Please verify the information below to continue".red
|
24
|
+
logger.warn "Host: ".red + " #{db_host}".white
|
25
|
+
logger.warn "Database: ".red + "#{db_name}".white
|
28
26
|
|
29
27
|
unless ENV["SKIP_CONFIRM"] == "true"
|
30
|
-
|
31
28
|
answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
|
32
29
|
unless answer == db_host
|
33
|
-
|
30
|
+
logger.error "exiting ...".red
|
34
31
|
exit
|
35
32
|
end
|
36
33
|
end
|
37
34
|
|
38
|
-
|
35
|
+
logger.warn "Scrubbing classes".red
|
39
36
|
|
40
37
|
Rails.application.eager_load! # make sure all the classes are loaded
|
41
38
|
|
42
|
-
@total_scrubbed = 0
|
43
|
-
|
44
39
|
ar_classes = ActiveRecord::Base.descendants.select{|d| d.scrubbable? }.sort_by{|d| d.to_s }
|
45
40
|
|
46
|
-
|
47
|
-
# if the ENV variable is set
|
48
|
-
|
49
|
-
unless ENV["SCRUB_CLASSES"].blank?
|
41
|
+
if ENV["SCRUB_CLASSES"].present?
|
50
42
|
class_list = ENV["SCRUB_CLASSES"].split(",")
|
51
43
|
class_list = class_list.map {|_class_str| _class_str.constantize }
|
52
44
|
ar_classes = ar_classes & class_list
|
53
45
|
end
|
54
46
|
|
55
|
-
|
47
|
+
logger.info "Scrubbable Classes: #{ar_classes.join(', ')}".white
|
56
48
|
|
57
49
|
Parallel.each(ar_classes) do |ar_class|
|
58
|
-
|
59
50
|
# Removing any find or initialize callbacks from model
|
60
51
|
ar_class.reset_callbacks(:initialize)
|
61
52
|
ar_class.reset_callbacks(:find)
|
62
53
|
|
63
|
-
|
54
|
+
logger.info "Scrubbing #{ar_class} ...".green
|
64
55
|
|
65
56
|
scrubbed_count = 0
|
66
57
|
|
@@ -81,16 +72,59 @@ namespace :scrub do
|
|
81
72
|
end
|
82
73
|
end
|
83
74
|
|
84
|
-
|
75
|
+
logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
|
85
76
|
end
|
86
77
|
ActiveRecord::Base.connection.verify!
|
87
78
|
|
88
79
|
if ENV["SKIP_AFTERHOOK"].blank?
|
89
|
-
|
80
|
+
logger.info "Running after hook".red
|
90
81
|
ActsAsScrubbable.execute_after_hook
|
91
82
|
end
|
92
83
|
|
93
|
-
|
84
|
+
logger.info "Scrub Complete!".white
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Scrub one table"
|
88
|
+
task :model, [:ar_class] => :environment do |_, args|
|
89
|
+
require 'highline/import'
|
90
|
+
require 'term/ansicolor'
|
91
|
+
require 'logger'
|
92
|
+
require 'acts_as_scrubbable/parallel_table_scrubber'
|
93
|
+
|
94
|
+
include Term::ANSIColor
|
95
|
+
|
96
|
+
logger = Logger.new($stdout)
|
97
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
98
|
+
"#{datetime}: [#{severity}] - #{msg}\n"
|
99
|
+
end
|
100
|
+
|
101
|
+
db_host = ActiveRecord::Base.connection_config[:host]
|
102
|
+
db_name = ActiveRecord::Base.connection_config[:database]
|
103
|
+
|
104
|
+
logger.warn "Please verify the information below to continue".red
|
105
|
+
logger.warn "Host: ".red + " #{db_host}".white
|
106
|
+
logger.warn "Database: ".red + "#{db_name}".white
|
107
|
+
|
108
|
+
unless ENV["SKIP_CONFIRM"] == "true"
|
109
|
+
answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
|
110
|
+
unless answer == db_host
|
111
|
+
logger.error "exiting ...".red
|
112
|
+
exit
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
Rails.application.eager_load! # make sure all the classes are loaded
|
117
|
+
|
118
|
+
ar_class = args[:ar_class].constantize
|
119
|
+
logger.info "Scrubbing #{ar_class} ...".green
|
120
|
+
|
121
|
+
num_batches = Integer(ENV.fetch("SCRUB_BATCHES", "256"))
|
122
|
+
scrubbed_count = ActsAsScrubbable::ParallelTableScrubber.new(ar_class).scrub(num_batches: num_batches)
|
123
|
+
|
124
|
+
logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
|
125
|
+
ActiveRecord::Base.connection.verify!
|
126
|
+
|
127
|
+
logger.info "Scrub Complete!".white
|
94
128
|
end
|
95
129
|
end
|
96
130
|
|
data/spec/db/schema.rb
CHANGED
@@ -2,8 +2,23 @@ ActiveRecord::Schema.define(version: 20150421224501) do
|
|
2
2
|
|
3
3
|
create_table "scrubbable_models", force: true do |t|
|
4
4
|
t.string "first_name"
|
5
|
+
t.string "last_name"
|
6
|
+
t.string "middle_name"
|
7
|
+
t.string "name"
|
8
|
+
t.string "email"
|
9
|
+
t.string "title"
|
10
|
+
t.string "company_name"
|
5
11
|
t.string "address1"
|
12
|
+
t.string "address2"
|
13
|
+
t.string "zip_code"
|
14
|
+
t.string "state"
|
15
|
+
t.string "state_short"
|
16
|
+
t.string "city"
|
6
17
|
t.string "lat"
|
18
|
+
t.string "lon"
|
19
|
+
t.string "username"
|
20
|
+
t.boolean "active"
|
21
|
+
t.string "school"
|
7
22
|
end
|
8
23
|
|
9
24
|
end
|
@@ -5,9 +5,35 @@ RSpec.describe ActsAsScrubbable::Scrub do
|
|
5
5
|
describe '.scrub' do
|
6
6
|
|
7
7
|
# update_columns cannot be run on a new record
|
8
|
-
subject{ ScrubbableModel.new }
|
8
|
+
subject { ScrubbableModel.new }
|
9
9
|
before(:each) { subject.save }
|
10
10
|
|
11
|
+
it 'scrubs all columns' do
|
12
|
+
subject.attributes = {
|
13
|
+
first_name: "Ted",
|
14
|
+
last_name: "Lowe",
|
15
|
+
middle_name: "Cassidy",
|
16
|
+
name: "Miss Vincenzo Smitham",
|
17
|
+
email: "trentdibbert@wiza.com",
|
18
|
+
title: "Internal Consultant",
|
19
|
+
company_name: "Greenfelder, Collier and Lesch",
|
20
|
+
address1: "86780 Watsica Flats",
|
21
|
+
address2: "Apt. 913",
|
22
|
+
zip_code: "49227",
|
23
|
+
state: "Ohio",
|
24
|
+
state_short: "OH",
|
25
|
+
city: "Port Hildegard",
|
26
|
+
lat: -79.5855309778974,
|
27
|
+
lon: 13.517352691513906,
|
28
|
+
username: "oscar.hermann",
|
29
|
+
active: false,
|
30
|
+
school: "Eastern Lebsack",
|
31
|
+
}
|
32
|
+
expect {
|
33
|
+
subject.scrub!
|
34
|
+
}.not_to raise_error
|
35
|
+
end
|
36
|
+
|
11
37
|
it 'changes the first_name attribute when scrub is run' do
|
12
38
|
subject.first_name = "Ted"
|
13
39
|
allow(Faker::Name).to receive(:first_name).and_return("John")
|
data/spec/support/database.rb
CHANGED
@@ -15,7 +15,24 @@ end
|
|
15
15
|
class NonScrubbableModel < ActiveRecord::Base; end
|
16
16
|
|
17
17
|
class ScrubbableModel < ActiveRecord::Base
|
18
|
-
acts_as_scrubbable :first_name,
|
18
|
+
acts_as_scrubbable :first_name,
|
19
|
+
:last_name,
|
20
|
+
:middle_name,
|
21
|
+
:name,
|
22
|
+
:email,
|
23
|
+
:company_name,
|
24
|
+
:zip_code,
|
25
|
+
:state,
|
26
|
+
:city,
|
27
|
+
:username,
|
28
|
+
:school,
|
29
|
+
:title => :name_title,
|
30
|
+
:address1 => :street_address,
|
31
|
+
:address2 => :secondary_address,
|
32
|
+
:state_short => :state_abbr,
|
33
|
+
:lat => :latitude,
|
34
|
+
:lon => :longitude,
|
35
|
+
:active => :boolean
|
19
36
|
attr_accessor :scrubbing_begun, :scrubbing_finished
|
20
37
|
set_callback :scrub, :before do
|
21
38
|
self.scrubbing_begun = true
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: acts_as_scrubbable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Samer Masry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -226,6 +226,7 @@ files:
|
|
226
226
|
- README.md
|
227
227
|
- acts_as_scrubbable.gemspec
|
228
228
|
- lib/acts_as_scrubbable.rb
|
229
|
+
- lib/acts_as_scrubbable/parallel_table_scrubber.rb
|
229
230
|
- lib/acts_as_scrubbable/scrub.rb
|
230
231
|
- lib/acts_as_scrubbable/scrubbable.rb
|
231
232
|
- lib/acts_as_scrubbable/tasks.rb
|
@@ -256,7 +257,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
256
257
|
version: '0'
|
257
258
|
requirements: []
|
258
259
|
rubyforge_project:
|
259
|
-
rubygems_version: 2.
|
260
|
+
rubygems_version: 2.6.14
|
260
261
|
signing_key:
|
261
262
|
specification_version: 4
|
262
263
|
summary: Scrubbing data made easy
|