acts_as_scrubbable 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/acts_as_scrubbable.rb +1 -1
- data/lib/acts_as_scrubbable/parallel_table_scrubber.rb +84 -0
- data/lib/acts_as_scrubbable/tasks.rb +57 -23
- data/lib/acts_as_scrubbable/version.rb +1 -1
- data/spec/db/schema.rb +15 -0
- data/spec/lib/acts_as_scrubbable/scrub_spec.rb +27 -1
- data/spec/support/database.rb +18 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ead6fbeb0cee99c7046ad13c00423805d1099550
|
4
|
+
data.tar.gz: e31b55515b4cef3df1d78e482ec37745fdcf518c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c56b941e491ed7e7d76e8d1abaeaddebbea42e6d1d6b023f122698f0c5d6483b6959a0dbb45007b5ef0443401d12fb0b37d7c975282675a7acefdf332603783
|
7
|
+
data.tar.gz: 6a13305d96f630c4f37494d92ce64a83dbba5d19e0a6528460bb34616d80ba13b172803e3d112fbf5d1a6db113fdc145605f47c1808395a07f811f2814f09660
|
data/lib/acts_as_scrubbable.rb
CHANGED
@@ -37,7 +37,7 @@ module ActsAsScrubbable
|
|
37
37
|
:middle_name => -> { Faker::Name.name },
|
38
38
|
:name => -> { Faker::Name.name },
|
39
39
|
:email => -> { Faker::Internet.email },
|
40
|
-
:name_title => -> { Faker::Name.title },
|
40
|
+
:name_title => -> { defined? Faker::Job ? Faker::Job.title : Faker::Name.title },
|
41
41
|
:company_name => -> { Faker::Company.name },
|
42
42
|
:street_address => -> { Faker::Address.street_address },
|
43
43
|
:secondary_address => -> { Faker::Address.secondary_address },
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require "parallel"
|
2
|
+
|
3
|
+
module ActsAsScrubbable
|
4
|
+
class ParallelTableScrubber
|
5
|
+
def initialize(ar_class)
|
6
|
+
@ar_class = ar_class
|
7
|
+
end
|
8
|
+
|
9
|
+
def scrub(num_batches:)
|
10
|
+
# Removing any find or initialize callbacks from model
|
11
|
+
ar_class.reset_callbacks(:initialize)
|
12
|
+
ar_class.reset_callbacks(:find)
|
13
|
+
|
14
|
+
queries = parallel_queries(ar_class: ar_class, num_batches: num_batches)
|
15
|
+
scrubbed_count = Parallel.map(queries) { |query|
|
16
|
+
scrubbed_count = 0
|
17
|
+
ActiveRecord::Base.connection_pool.with_connection do
|
18
|
+
relation = ar_class
|
19
|
+
relation = relation.send(:scrubbable_scope) if ar_class.respond_to?(:scrubbable_scope)
|
20
|
+
relation.where(query).find_in_batches(batch_size: 1000) do |batch|
|
21
|
+
ActiveRecord::Base.transaction do
|
22
|
+
batch.each do |obj|
|
23
|
+
obj.scrub!
|
24
|
+
scrubbed_count += 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
scrubbed_count
|
30
|
+
}.reduce(:+)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :ar_class
|
36
|
+
|
37
|
+
# create even ID ranges for the table
|
38
|
+
def parallel_queries(ar_class:, num_batches:)
|
39
|
+
raise "Model is missing id column" if ar_class.columns.none? { |column| column.name == "id" }
|
40
|
+
|
41
|
+
if ar_class.respond_to?(:scrubbable_scope)
|
42
|
+
num_records = ar_class.send(:scrubbable_scope).count
|
43
|
+
else
|
44
|
+
num_records = ar_class.count
|
45
|
+
end
|
46
|
+
return [] if num_records == 0 # no records to import
|
47
|
+
|
48
|
+
record_window_size, modulus = num_records.divmod(num_batches)
|
49
|
+
if record_window_size < 1
|
50
|
+
record_window_size = 1
|
51
|
+
modulus = 0
|
52
|
+
end
|
53
|
+
|
54
|
+
start_id = next_id(ar_class: ar_class, offset: 0)
|
55
|
+
queries = num_batches.times.each_with_object([]) do |_, queries|
|
56
|
+
next unless start_id
|
57
|
+
|
58
|
+
end_id = next_id(ar_class: ar_class, id: start_id, offset: record_window_size-1)
|
59
|
+
if modulus > 0
|
60
|
+
end_id = next_id(ar_class: ar_class, id: end_id)
|
61
|
+
modulus -= 1
|
62
|
+
end
|
63
|
+
queries << {id: start_id..end_id} if end_id
|
64
|
+
start_id = next_id(ar_class: ar_class, id: end_id || start_id)
|
65
|
+
end
|
66
|
+
|
67
|
+
# just in case new records are added since we started, extend the end ID
|
68
|
+
queries[-1] = ["#{ar_class.quoted_table_name}.id >= ?", queries[-1][:id].begin] if queries.any?
|
69
|
+
|
70
|
+
queries
|
71
|
+
end
|
72
|
+
|
73
|
+
def next_id(ar_class:, id: nil, offset: 1)
|
74
|
+
if ar_class.respond_to?(:scrubbable_scope)
|
75
|
+
collection = ar_class.send(:scrubbable_scope)
|
76
|
+
else
|
77
|
+
collection = ar_class.all
|
78
|
+
end
|
79
|
+
collection.reorder(:id)
|
80
|
+
collection = collection.where("#{ar_class.quoted_table_name}.id >= :id", id: id) if id
|
81
|
+
collection.offset(offset).limit(1).pluck(:id).first
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -3,64 +3,55 @@ require 'rake'
|
|
3
3
|
|
4
4
|
namespace :scrub do
|
5
5
|
|
6
|
-
desc "scrub all"
|
6
|
+
desc "scrub all scrubbable tables"
|
7
7
|
task all: :environment do
|
8
|
-
|
9
8
|
require 'highline/import'
|
10
9
|
require 'term/ansicolor'
|
11
10
|
require 'logger'
|
12
11
|
require 'parallel'
|
13
12
|
|
14
|
-
|
15
13
|
include Term::ANSIColor
|
16
14
|
|
17
|
-
|
18
|
-
|
15
|
+
logger = Logger.new($stdout)
|
16
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
19
17
|
"#{datetime}: [#{severity}] - #{msg}\n"
|
20
18
|
end
|
21
19
|
|
22
20
|
db_host = ActiveRecord::Base.connection_config[:host]
|
23
21
|
db_name = ActiveRecord::Base.connection_config[:database]
|
24
22
|
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
logger.warn "Please verify the information below to continue".red
|
24
|
+
logger.warn "Host: ".red + " #{db_host}".white
|
25
|
+
logger.warn "Database: ".red + "#{db_name}".white
|
28
26
|
|
29
27
|
unless ENV["SKIP_CONFIRM"] == "true"
|
30
|
-
|
31
28
|
answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
|
32
29
|
unless answer == db_host
|
33
|
-
|
30
|
+
logger.error "exiting ...".red
|
34
31
|
exit
|
35
32
|
end
|
36
33
|
end
|
37
34
|
|
38
|
-
|
35
|
+
logger.warn "Scrubbing classes".red
|
39
36
|
|
40
37
|
Rails.application.eager_load! # make sure all the classes are loaded
|
41
38
|
|
42
|
-
@total_scrubbed = 0
|
43
|
-
|
44
39
|
ar_classes = ActiveRecord::Base.descendants.select{|d| d.scrubbable? }.sort_by{|d| d.to_s }
|
45
40
|
|
46
|
-
|
47
|
-
# if the ENV variable is set
|
48
|
-
|
49
|
-
unless ENV["SCRUB_CLASSES"].blank?
|
41
|
+
if ENV["SCRUB_CLASSES"].present?
|
50
42
|
class_list = ENV["SCRUB_CLASSES"].split(",")
|
51
43
|
class_list = class_list.map {|_class_str| _class_str.constantize }
|
52
44
|
ar_classes = ar_classes & class_list
|
53
45
|
end
|
54
46
|
|
55
|
-
|
47
|
+
logger.info "Scrubbable Classes: #{ar_classes.join(', ')}".white
|
56
48
|
|
57
49
|
Parallel.each(ar_classes) do |ar_class|
|
58
|
-
|
59
50
|
# Removing any find or initialize callbacks from model
|
60
51
|
ar_class.reset_callbacks(:initialize)
|
61
52
|
ar_class.reset_callbacks(:find)
|
62
53
|
|
63
|
-
|
54
|
+
logger.info "Scrubbing #{ar_class} ...".green
|
64
55
|
|
65
56
|
scrubbed_count = 0
|
66
57
|
|
@@ -81,16 +72,59 @@ namespace :scrub do
|
|
81
72
|
end
|
82
73
|
end
|
83
74
|
|
84
|
-
|
75
|
+
logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
|
85
76
|
end
|
86
77
|
ActiveRecord::Base.connection.verify!
|
87
78
|
|
88
79
|
if ENV["SKIP_AFTERHOOK"].blank?
|
89
|
-
|
80
|
+
logger.info "Running after hook".red
|
90
81
|
ActsAsScrubbable.execute_after_hook
|
91
82
|
end
|
92
83
|
|
93
|
-
|
84
|
+
logger.info "Scrub Complete!".white
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Scrub one table"
|
88
|
+
task :model, [:ar_class] => :environment do |_, args|
|
89
|
+
require 'highline/import'
|
90
|
+
require 'term/ansicolor'
|
91
|
+
require 'logger'
|
92
|
+
require 'acts_as_scrubbable/parallel_table_scrubber'
|
93
|
+
|
94
|
+
include Term::ANSIColor
|
95
|
+
|
96
|
+
logger = Logger.new($stdout)
|
97
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
98
|
+
"#{datetime}: [#{severity}] - #{msg}\n"
|
99
|
+
end
|
100
|
+
|
101
|
+
db_host = ActiveRecord::Base.connection_config[:host]
|
102
|
+
db_name = ActiveRecord::Base.connection_config[:database]
|
103
|
+
|
104
|
+
logger.warn "Please verify the information below to continue".red
|
105
|
+
logger.warn "Host: ".red + " #{db_host}".white
|
106
|
+
logger.warn "Database: ".red + "#{db_name}".white
|
107
|
+
|
108
|
+
unless ENV["SKIP_CONFIRM"] == "true"
|
109
|
+
answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
|
110
|
+
unless answer == db_host
|
111
|
+
logger.error "exiting ...".red
|
112
|
+
exit
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
Rails.application.eager_load! # make sure all the classes are loaded
|
117
|
+
|
118
|
+
ar_class = args[:ar_class].constantize
|
119
|
+
logger.info "Scrubbing #{ar_class} ...".green
|
120
|
+
|
121
|
+
num_batches = Integer(ENV.fetch("SCRUB_BATCHES", "256"))
|
122
|
+
scrubbed_count = ActsAsScrubbable::ParallelTableScrubber.new(ar_class).scrub(num_batches: num_batches)
|
123
|
+
|
124
|
+
logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
|
125
|
+
ActiveRecord::Base.connection.verify!
|
126
|
+
|
127
|
+
logger.info "Scrub Complete!".white
|
94
128
|
end
|
95
129
|
end
|
96
130
|
|
data/spec/db/schema.rb
CHANGED
@@ -2,8 +2,23 @@ ActiveRecord::Schema.define(version: 20150421224501) do
|
|
2
2
|
|
3
3
|
create_table "scrubbable_models", force: true do |t|
|
4
4
|
t.string "first_name"
|
5
|
+
t.string "last_name"
|
6
|
+
t.string "middle_name"
|
7
|
+
t.string "name"
|
8
|
+
t.string "email"
|
9
|
+
t.string "title"
|
10
|
+
t.string "company_name"
|
5
11
|
t.string "address1"
|
12
|
+
t.string "address2"
|
13
|
+
t.string "zip_code"
|
14
|
+
t.string "state"
|
15
|
+
t.string "state_short"
|
16
|
+
t.string "city"
|
6
17
|
t.string "lat"
|
18
|
+
t.string "lon"
|
19
|
+
t.string "username"
|
20
|
+
t.boolean "active"
|
21
|
+
t.string "school"
|
7
22
|
end
|
8
23
|
|
9
24
|
end
|
@@ -5,9 +5,35 @@ RSpec.describe ActsAsScrubbable::Scrub do
|
|
5
5
|
describe '.scrub' do
|
6
6
|
|
7
7
|
# update_columns cannot be run on a new record
|
8
|
-
subject{ ScrubbableModel.new }
|
8
|
+
subject { ScrubbableModel.new }
|
9
9
|
before(:each) { subject.save }
|
10
10
|
|
11
|
+
it 'scrubs all columns' do
|
12
|
+
subject.attributes = {
|
13
|
+
first_name: "Ted",
|
14
|
+
last_name: "Lowe",
|
15
|
+
middle_name: "Cassidy",
|
16
|
+
name: "Miss Vincenzo Smitham",
|
17
|
+
email: "trentdibbert@wiza.com",
|
18
|
+
title: "Internal Consultant",
|
19
|
+
company_name: "Greenfelder, Collier and Lesch",
|
20
|
+
address1: "86780 Watsica Flats",
|
21
|
+
address2: "Apt. 913",
|
22
|
+
zip_code: "49227",
|
23
|
+
state: "Ohio",
|
24
|
+
state_short: "OH",
|
25
|
+
city: "Port Hildegard",
|
26
|
+
lat: -79.5855309778974,
|
27
|
+
lon: 13.517352691513906,
|
28
|
+
username: "oscar.hermann",
|
29
|
+
active: false,
|
30
|
+
school: "Eastern Lebsack",
|
31
|
+
}
|
32
|
+
expect {
|
33
|
+
subject.scrub!
|
34
|
+
}.not_to raise_error
|
35
|
+
end
|
36
|
+
|
11
37
|
it 'changes the first_name attribute when scrub is run' do
|
12
38
|
subject.first_name = "Ted"
|
13
39
|
allow(Faker::Name).to receive(:first_name).and_return("John")
|
data/spec/support/database.rb
CHANGED
@@ -15,7 +15,24 @@ end
|
|
15
15
|
class NonScrubbableModel < ActiveRecord::Base; end
|
16
16
|
|
17
17
|
class ScrubbableModel < ActiveRecord::Base
|
18
|
-
acts_as_scrubbable :first_name,
|
18
|
+
acts_as_scrubbable :first_name,
|
19
|
+
:last_name,
|
20
|
+
:middle_name,
|
21
|
+
:name,
|
22
|
+
:email,
|
23
|
+
:company_name,
|
24
|
+
:zip_code,
|
25
|
+
:state,
|
26
|
+
:city,
|
27
|
+
:username,
|
28
|
+
:school,
|
29
|
+
:title => :name_title,
|
30
|
+
:address1 => :street_address,
|
31
|
+
:address2 => :secondary_address,
|
32
|
+
:state_short => :state_abbr,
|
33
|
+
:lat => :latitude,
|
34
|
+
:lon => :longitude,
|
35
|
+
:active => :boolean
|
19
36
|
attr_accessor :scrubbing_begun, :scrubbing_finished
|
20
37
|
set_callback :scrub, :before do
|
21
38
|
self.scrubbing_begun = true
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: acts_as_scrubbable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Samer Masry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -226,6 +226,7 @@ files:
|
|
226
226
|
- README.md
|
227
227
|
- acts_as_scrubbable.gemspec
|
228
228
|
- lib/acts_as_scrubbable.rb
|
229
|
+
- lib/acts_as_scrubbable/parallel_table_scrubber.rb
|
229
230
|
- lib/acts_as_scrubbable/scrub.rb
|
230
231
|
- lib/acts_as_scrubbable/scrubbable.rb
|
231
232
|
- lib/acts_as_scrubbable/tasks.rb
|
@@ -256,7 +257,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
256
257
|
version: '0'
|
257
258
|
requirements: []
|
258
259
|
rubyforge_project:
|
259
|
-
rubygems_version: 2.
|
260
|
+
rubygems_version: 2.6.14
|
260
261
|
signing_key:
|
261
262
|
specification_version: 4
|
262
263
|
summary: Scrubbing data made easy
|