acts_as_scrubbable 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +7 -0
- data/Gemfile +0 -2
- data/acts_as_scrubbable.gemspec +3 -3
- data/lib/acts_as_scrubbable.rb +1 -1
- data/lib/acts_as_scrubbable/parallel_table_scrubber.rb +84 -0
- data/lib/acts_as_scrubbable/tasks.rb +57 -23
- data/lib/acts_as_scrubbable/version.rb +1 -1
- data/spec/db/schema.rb +15 -0
- data/spec/lib/acts_as_scrubbable/scrub_spec.rb +27 -1
- data/spec/support/database.rb +19 -2
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bb73960ded68e0d1a181ea4bf86300ae30dccb3e29330e63ddc8cae835662750
|
4
|
+
data.tar.gz: 534cdd721d48284f9d714093f307108f0c12ac13ef9636bd28b08745f984fcc9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8685741230ee6f515b9b5738dd5b32772b66f513f991f19fe95d0db2dfcb6414f3424d1a28218e4f2b5a285bb6a0b739ce6446e2bfd569023caf1dc432ec9924
|
7
|
+
data.tar.gz: 4883b89adccb21c37572a3e4aa98fba58b19765d356748875921ab877c6bcc4a201b9c798f38777d8ac99cd46c0b6c4d27aac2595657d03c6199fdcc012fe69b
|
data/.travis.yml
ADDED
data/Gemfile
CHANGED
data/acts_as_scrubbable.gemspec
CHANGED
@@ -13,9 +13,9 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.license = "MIT"
|
14
14
|
s.required_ruby_version = '~> 2.0'
|
15
15
|
|
16
|
-
s.add_runtime_dependency 'activesupport' , '>= 4.1', '<
|
17
|
-
s.add_runtime_dependency 'activerecord' , '>= 4.1', '<
|
18
|
-
s.add_runtime_dependency 'railties' , '>= 4.1', '<
|
16
|
+
s.add_runtime_dependency 'activesupport' , '>= 4.1', '< 6'
|
17
|
+
s.add_runtime_dependency 'activerecord' , '>= 4.1', '< 6'
|
18
|
+
s.add_runtime_dependency 'railties' , '>= 4.1', '< 6'
|
19
19
|
s.add_runtime_dependency 'faker' , '>= 1.4'
|
20
20
|
s.add_runtime_dependency 'highline' , '>= 1.7'
|
21
21
|
s.add_runtime_dependency 'term-ansicolor' , '>= 1.3'
|
data/lib/acts_as_scrubbable.rb
CHANGED
@@ -37,7 +37,7 @@ module ActsAsScrubbable
|
|
37
37
|
:middle_name => -> { Faker::Name.name },
|
38
38
|
:name => -> { Faker::Name.name },
|
39
39
|
:email => -> { Faker::Internet.email },
|
40
|
-
:name_title => -> { Faker::Name.title },
|
40
|
+
:name_title => -> { defined? Faker::Job ? Faker::Job.title : Faker::Name.title },
|
41
41
|
:company_name => -> { Faker::Company.name },
|
42
42
|
:street_address => -> { Faker::Address.street_address },
|
43
43
|
:secondary_address => -> { Faker::Address.secondary_address },
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require "parallel"
|
2
|
+
|
3
|
+
module ActsAsScrubbable
|
4
|
+
class ParallelTableScrubber
|
5
|
+
def initialize(ar_class)
|
6
|
+
@ar_class = ar_class
|
7
|
+
end
|
8
|
+
|
9
|
+
def scrub(num_batches:)
|
10
|
+
# Removing any find or initialize callbacks from model
|
11
|
+
ar_class.reset_callbacks(:initialize)
|
12
|
+
ar_class.reset_callbacks(:find)
|
13
|
+
|
14
|
+
queries = parallel_queries(ar_class: ar_class, num_batches: num_batches)
|
15
|
+
scrubbed_count = Parallel.map(queries) { |query|
|
16
|
+
scrubbed_count = 0
|
17
|
+
ActiveRecord::Base.connection_pool.with_connection do
|
18
|
+
relation = ar_class
|
19
|
+
relation = relation.send(:scrubbable_scope) if ar_class.respond_to?(:scrubbable_scope)
|
20
|
+
relation.where(query).find_in_batches(batch_size: 1000) do |batch|
|
21
|
+
ActiveRecord::Base.transaction do
|
22
|
+
batch.each do |obj|
|
23
|
+
obj.scrub!
|
24
|
+
scrubbed_count += 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
scrubbed_count
|
30
|
+
}.reduce(:+)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :ar_class
|
36
|
+
|
37
|
+
# create even ID ranges for the table
|
38
|
+
def parallel_queries(ar_class:, num_batches:)
|
39
|
+
raise "Model is missing id column" if ar_class.columns.none? { |column| column.name == "id" }
|
40
|
+
|
41
|
+
if ar_class.respond_to?(:scrubbable_scope)
|
42
|
+
num_records = ar_class.send(:scrubbable_scope).count
|
43
|
+
else
|
44
|
+
num_records = ar_class.count
|
45
|
+
end
|
46
|
+
return [] if num_records == 0 # no records to import
|
47
|
+
|
48
|
+
record_window_size, modulus = num_records.divmod(num_batches)
|
49
|
+
if record_window_size < 1
|
50
|
+
record_window_size = 1
|
51
|
+
modulus = 0
|
52
|
+
end
|
53
|
+
|
54
|
+
start_id = next_id(ar_class: ar_class, offset: 0)
|
55
|
+
queries = num_batches.times.each_with_object([]) do |_, queries|
|
56
|
+
next unless start_id
|
57
|
+
|
58
|
+
end_id = next_id(ar_class: ar_class, id: start_id, offset: record_window_size-1)
|
59
|
+
if modulus > 0
|
60
|
+
end_id = next_id(ar_class: ar_class, id: end_id)
|
61
|
+
modulus -= 1
|
62
|
+
end
|
63
|
+
queries << {id: start_id..end_id} if end_id
|
64
|
+
start_id = next_id(ar_class: ar_class, id: end_id || start_id)
|
65
|
+
end
|
66
|
+
|
67
|
+
# just in case new records are added since we started, extend the end ID
|
68
|
+
queries[-1] = ["#{ar_class.quoted_table_name}.id >= ?", queries[-1][:id].begin] if queries.any?
|
69
|
+
|
70
|
+
queries
|
71
|
+
end
|
72
|
+
|
73
|
+
def next_id(ar_class:, id: nil, offset: 1)
|
74
|
+
if ar_class.respond_to?(:scrubbable_scope)
|
75
|
+
collection = ar_class.send(:scrubbable_scope)
|
76
|
+
else
|
77
|
+
collection = ar_class.all
|
78
|
+
end
|
79
|
+
collection = collection.reorder(:id)
|
80
|
+
collection = collection.where("#{ar_class.quoted_table_name}.id >= :id", id: id) if id
|
81
|
+
collection.offset(offset).limit(1).pluck(:id).first
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -3,64 +3,55 @@ require 'rake'
|
|
3
3
|
|
4
4
|
namespace :scrub do
|
5
5
|
|
6
|
-
desc "scrub all"
|
6
|
+
desc "scrub all scrubbable tables"
|
7
7
|
task all: :environment do
|
8
|
-
|
9
8
|
require 'highline/import'
|
10
9
|
require 'term/ansicolor'
|
11
10
|
require 'logger'
|
12
11
|
require 'parallel'
|
13
12
|
|
14
|
-
|
15
13
|
include Term::ANSIColor
|
16
14
|
|
17
|
-
|
18
|
-
|
15
|
+
logger = Logger.new($stdout)
|
16
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
19
17
|
"#{datetime}: [#{severity}] - #{msg}\n"
|
20
18
|
end
|
21
19
|
|
22
20
|
db_host = ActiveRecord::Base.connection_config[:host]
|
23
21
|
db_name = ActiveRecord::Base.connection_config[:database]
|
24
22
|
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
logger.warn "Please verify the information below to continue".red
|
24
|
+
logger.warn "Host: ".red + " #{db_host}".white
|
25
|
+
logger.warn "Database: ".red + "#{db_name}".white
|
28
26
|
|
29
27
|
unless ENV["SKIP_CONFIRM"] == "true"
|
30
|
-
|
31
28
|
answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
|
32
29
|
unless answer == db_host
|
33
|
-
|
30
|
+
logger.error "exiting ...".red
|
34
31
|
exit
|
35
32
|
end
|
36
33
|
end
|
37
34
|
|
38
|
-
|
35
|
+
logger.warn "Scrubbing classes".red
|
39
36
|
|
40
37
|
Rails.application.eager_load! # make sure all the classes are loaded
|
41
38
|
|
42
|
-
@total_scrubbed = 0
|
43
|
-
|
44
39
|
ar_classes = ActiveRecord::Base.descendants.select{|d| d.scrubbable? }.sort_by{|d| d.to_s }
|
45
40
|
|
46
|
-
|
47
|
-
# if the ENV variable is set
|
48
|
-
|
49
|
-
unless ENV["SCRUB_CLASSES"].blank?
|
41
|
+
if ENV["SCRUB_CLASSES"].present?
|
50
42
|
class_list = ENV["SCRUB_CLASSES"].split(",")
|
51
43
|
class_list = class_list.map {|_class_str| _class_str.constantize }
|
52
44
|
ar_classes = ar_classes & class_list
|
53
45
|
end
|
54
46
|
|
55
|
-
|
47
|
+
logger.info "Scrubbable Classes: #{ar_classes.join(', ')}".white
|
56
48
|
|
57
49
|
Parallel.each(ar_classes) do |ar_class|
|
58
|
-
|
59
50
|
# Removing any find or initialize callbacks from model
|
60
51
|
ar_class.reset_callbacks(:initialize)
|
61
52
|
ar_class.reset_callbacks(:find)
|
62
53
|
|
63
|
-
|
54
|
+
logger.info "Scrubbing #{ar_class} ...".green
|
64
55
|
|
65
56
|
scrubbed_count = 0
|
66
57
|
|
@@ -81,16 +72,59 @@ namespace :scrub do
|
|
81
72
|
end
|
82
73
|
end
|
83
74
|
|
84
|
-
|
75
|
+
logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
|
85
76
|
end
|
86
77
|
ActiveRecord::Base.connection.verify!
|
87
78
|
|
88
79
|
if ENV["SKIP_AFTERHOOK"].blank?
|
89
|
-
|
80
|
+
logger.info "Running after hook".red
|
90
81
|
ActsAsScrubbable.execute_after_hook
|
91
82
|
end
|
92
83
|
|
93
|
-
|
84
|
+
logger.info "Scrub Complete!".white
|
85
|
+
end
|
86
|
+
|
87
|
+
desc "Scrub one table"
|
88
|
+
task :model, [:ar_class] => :environment do |_, args|
|
89
|
+
require 'highline/import'
|
90
|
+
require 'term/ansicolor'
|
91
|
+
require 'logger'
|
92
|
+
require 'acts_as_scrubbable/parallel_table_scrubber'
|
93
|
+
|
94
|
+
include Term::ANSIColor
|
95
|
+
|
96
|
+
logger = Logger.new($stdout)
|
97
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
98
|
+
"#{datetime}: [#{severity}] - #{msg}\n"
|
99
|
+
end
|
100
|
+
|
101
|
+
db_host = ActiveRecord::Base.connection_config[:host]
|
102
|
+
db_name = ActiveRecord::Base.connection_config[:database]
|
103
|
+
|
104
|
+
logger.warn "Please verify the information below to continue".red
|
105
|
+
logger.warn "Host: ".red + " #{db_host}".white
|
106
|
+
logger.warn "Database: ".red + "#{db_name}".white
|
107
|
+
|
108
|
+
unless ENV["SKIP_CONFIRM"] == "true"
|
109
|
+
answer = ask("Type '#{db_host}' to continue. \n".red + '-> '.white)
|
110
|
+
unless answer == db_host
|
111
|
+
logger.error "exiting ...".red
|
112
|
+
exit
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
Rails.application.eager_load! # make sure all the classes are loaded
|
117
|
+
|
118
|
+
ar_class = args[:ar_class].constantize
|
119
|
+
logger.info "Scrubbing #{ar_class} ...".green
|
120
|
+
|
121
|
+
num_batches = Integer(ENV.fetch("SCRUB_BATCHES", "256"))
|
122
|
+
scrubbed_count = ActsAsScrubbable::ParallelTableScrubber.new(ar_class).scrub(num_batches: num_batches)
|
123
|
+
|
124
|
+
logger.info "#{scrubbed_count} #{ar_class} objects scrubbed".blue
|
125
|
+
ActiveRecord::Base.connection.verify!
|
126
|
+
|
127
|
+
logger.info "Scrub Complete!".white
|
94
128
|
end
|
95
129
|
end
|
96
130
|
|
data/spec/db/schema.rb
CHANGED
@@ -2,8 +2,23 @@ ActiveRecord::Schema.define(version: 20150421224501) do
|
|
2
2
|
|
3
3
|
create_table "scrubbable_models", force: true do |t|
|
4
4
|
t.string "first_name"
|
5
|
+
t.string "last_name"
|
6
|
+
t.string "middle_name"
|
7
|
+
t.string "name"
|
8
|
+
t.string "email"
|
9
|
+
t.string "title"
|
10
|
+
t.string "company_name"
|
5
11
|
t.string "address1"
|
12
|
+
t.string "address2"
|
13
|
+
t.string "zip_code"
|
14
|
+
t.string "state"
|
15
|
+
t.string "state_short"
|
16
|
+
t.string "city"
|
6
17
|
t.string "lat"
|
18
|
+
t.string "lon"
|
19
|
+
t.string "username"
|
20
|
+
t.boolean "active"
|
21
|
+
t.string "school"
|
7
22
|
end
|
8
23
|
|
9
24
|
end
|
@@ -5,9 +5,35 @@ RSpec.describe ActsAsScrubbable::Scrub do
|
|
5
5
|
describe '.scrub' do
|
6
6
|
|
7
7
|
# update_columns cannot be run on a new record
|
8
|
-
subject{ ScrubbableModel.new }
|
8
|
+
subject { ScrubbableModel.new }
|
9
9
|
before(:each) { subject.save }
|
10
10
|
|
11
|
+
it 'scrubs all columns' do
|
12
|
+
subject.attributes = {
|
13
|
+
first_name: "Ted",
|
14
|
+
last_name: "Lowe",
|
15
|
+
middle_name: "Cassidy",
|
16
|
+
name: "Miss Vincenzo Smitham",
|
17
|
+
email: "trentdibbert@wiza.com",
|
18
|
+
title: "Internal Consultant",
|
19
|
+
company_name: "Greenfelder, Collier and Lesch",
|
20
|
+
address1: "86780 Watsica Flats",
|
21
|
+
address2: "Apt. 913",
|
22
|
+
zip_code: "49227",
|
23
|
+
state: "Ohio",
|
24
|
+
state_short: "OH",
|
25
|
+
city: "Port Hildegard",
|
26
|
+
lat: -79.5855309778974,
|
27
|
+
lon: 13.517352691513906,
|
28
|
+
username: "oscar.hermann",
|
29
|
+
active: false,
|
30
|
+
school: "Eastern Lebsack",
|
31
|
+
}
|
32
|
+
expect {
|
33
|
+
subject.scrub!
|
34
|
+
}.not_to raise_error
|
35
|
+
end
|
36
|
+
|
11
37
|
it 'changes the first_name attribute when scrub is run' do
|
12
38
|
subject.first_name = "Ted"
|
13
39
|
allow(Faker::Name).to receive(:first_name).and_return("John")
|
data/spec/support/database.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'nulldb/rails'
|
2
2
|
require 'nulldb_rspec'
|
3
3
|
|
4
|
-
ActiveRecord::Base.configurations
|
4
|
+
ActiveRecord::Base.configurations.merge!("test" => {adapter: 'nulldb'})
|
5
5
|
|
6
6
|
NullDB.configure do |c|
|
7
7
|
c.project_root = './spec'
|
@@ -15,7 +15,24 @@ end
|
|
15
15
|
class NonScrubbableModel < ActiveRecord::Base; end
|
16
16
|
|
17
17
|
class ScrubbableModel < ActiveRecord::Base
|
18
|
-
acts_as_scrubbable :first_name,
|
18
|
+
acts_as_scrubbable :first_name,
|
19
|
+
:last_name,
|
20
|
+
:middle_name,
|
21
|
+
:name,
|
22
|
+
:email,
|
23
|
+
:company_name,
|
24
|
+
:zip_code,
|
25
|
+
:state,
|
26
|
+
:city,
|
27
|
+
:username,
|
28
|
+
:school,
|
29
|
+
:title => :name_title,
|
30
|
+
:address1 => :street_address,
|
31
|
+
:address2 => :secondary_address,
|
32
|
+
:state_short => :state_abbr,
|
33
|
+
:lat => :latitude,
|
34
|
+
:lon => :longitude,
|
35
|
+
:active => :boolean
|
19
36
|
attr_accessor :scrubbing_begun, :scrubbing_finished
|
20
37
|
set_callback :scrub, :before do
|
21
38
|
self.scrubbing_begun = true
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: acts_as_scrubbable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Samer Masry
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: '4.1'
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '6'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '4.1'
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '6'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: activerecord
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
version: '4.1'
|
40
40
|
- - "<"
|
41
41
|
- !ruby/object:Gem::Version
|
42
|
-
version: '
|
42
|
+
version: '6'
|
43
43
|
type: :runtime
|
44
44
|
prerelease: false
|
45
45
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -49,7 +49,7 @@ dependencies:
|
|
49
49
|
version: '4.1'
|
50
50
|
- - "<"
|
51
51
|
- !ruby/object:Gem::Version
|
52
|
-
version: '
|
52
|
+
version: '6'
|
53
53
|
- !ruby/object:Gem::Dependency
|
54
54
|
name: railties
|
55
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,7 +59,7 @@ dependencies:
|
|
59
59
|
version: '4.1'
|
60
60
|
- - "<"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '
|
62
|
+
version: '6'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -69,7 +69,7 @@ dependencies:
|
|
69
69
|
version: '4.1'
|
70
70
|
- - "<"
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: '
|
72
|
+
version: '6'
|
73
73
|
- !ruby/object:Gem::Dependency
|
74
74
|
name: faker
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,11 +220,13 @@ extra_rdoc_files: []
|
|
220
220
|
files:
|
221
221
|
- ".gitignore"
|
222
222
|
- ".rspec"
|
223
|
+
- ".travis.yml"
|
223
224
|
- Gemfile
|
224
225
|
- Guardfile
|
225
226
|
- README.md
|
226
227
|
- acts_as_scrubbable.gemspec
|
227
228
|
- lib/acts_as_scrubbable.rb
|
229
|
+
- lib/acts_as_scrubbable/parallel_table_scrubber.rb
|
228
230
|
- lib/acts_as_scrubbable/scrub.rb
|
229
231
|
- lib/acts_as_scrubbable/scrubbable.rb
|
230
232
|
- lib/acts_as_scrubbable/tasks.rb
|
@@ -239,7 +241,7 @@ homepage: https://github.com/smasry/acts_as_scrubbable
|
|
239
241
|
licenses:
|
240
242
|
- MIT
|
241
243
|
metadata: {}
|
242
|
-
post_install_message:
|
244
|
+
post_install_message:
|
243
245
|
rdoc_options: []
|
244
246
|
require_paths:
|
245
247
|
- lib
|
@@ -254,9 +256,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
254
256
|
- !ruby/object:Gem::Version
|
255
257
|
version: '0'
|
256
258
|
requirements: []
|
257
|
-
|
258
|
-
|
259
|
-
signing_key:
|
259
|
+
rubygems_version: 3.1.4
|
260
|
+
signing_key:
|
260
261
|
specification_version: 4
|
261
262
|
summary: Scrubbing data made easy
|
262
263
|
test_files:
|