pumice 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +962 -0
- data/lib/pumice/analyzer.rb +67 -0
- data/lib/pumice/configuration.rb +330 -0
- data/lib/pumice/dsl.rb +267 -0
- data/lib/pumice/dump_generator.rb +115 -0
- data/lib/pumice/empty_sanitizer.rb +38 -0
- data/lib/pumice/generators/column_classification.rb +58 -0
- data/lib/pumice/generators/install_generator.rb +33 -0
- data/lib/pumice/generators/sanitizer_generator.rb +107 -0
- data/lib/pumice/generators/templates/initializer.rb.erb +51 -0
- data/lib/pumice/generators/templates/sanitizer.rb.erb +32 -0
- data/lib/pumice/generators/templates/sanitizer_spec.rb.erb +15 -0
- data/lib/pumice/generators/test_generator.rb +20 -0
- data/lib/pumice/helpers.rb +141 -0
- data/lib/pumice/logger.rb +105 -0
- data/lib/pumice/output.rb +81 -0
- data/lib/pumice/progress.rb +42 -0
- data/lib/pumice/pruner.rb +157 -0
- data/lib/pumice/pruning/analyzer.rb +207 -0
- data/lib/pumice/railtie.rb +15 -0
- data/lib/pumice/rspec.rb +101 -0
- data/lib/pumice/runner.rb +66 -0
- data/lib/pumice/safe_scrubber.rb +341 -0
- data/lib/pumice/sanitizer.rb +336 -0
- data/lib/pumice/soft_scrubbing/policy.rb +104 -0
- data/lib/pumice/soft_scrubbing.rb +101 -0
- data/lib/pumice/validator.rb +113 -0
- data/lib/pumice/version.rb +5 -0
- data/lib/pumice.rb +23 -0
- data/lib/tasks/db_scrub.rake +616 -0
- metadata +132 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pumice
|
|
4
|
+
class UndefinedAttributeError < StandardError; end
|
|
5
|
+
class VerificationError < StandardError; end
|
|
6
|
+
|
|
7
|
+
class Sanitizer
|
|
8
|
+
extend Pumice::DSL
|
|
9
|
+
include Pumice::Helpers
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def inherited(subclass)
|
|
13
|
+
super
|
|
14
|
+
Pumice.register(subclass)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Non-destructive sanitization - returns values without persisting
|
|
18
|
+
# sanitize(record) → returns hash of all sanitized values
|
|
19
|
+
# sanitize(record, :attr) → returns single sanitized value
|
|
20
|
+
def sanitize(record, attr_name = nil, raw_value: nil)
|
|
21
|
+
with_seed_for(record) do
|
|
22
|
+
instance = new(record)
|
|
23
|
+
attr_name ? instance.scrub(attr_name, raw_value) : instance.scrub_all
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Destructive scrubbing - persists to database
|
|
28
|
+
# scrub!(record) → persists all scrubbed values
|
|
29
|
+
# scrub!(record, :attr) → persists single scrubbed value
|
|
30
|
+
def scrub!(record, attr_name = nil)
|
|
31
|
+
result = sanitize(record, attr_name)
|
|
32
|
+
persist(record, attr_name, result)
|
|
33
|
+
result
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Batch operation - sanitize all records of this model
|
|
37
|
+
# If a bulk operation (truncate!, delete_all, destroy_all) is defined,
|
|
38
|
+
# it runs instead of record-by-record sanitization.
|
|
39
|
+
# If a prune operation is defined, matching records are deleted first,
|
|
40
|
+
# then remaining records are scrubbed one-by-one.
|
|
41
|
+
def scrub_all!
|
|
42
|
+
validate_coverage! if Pumice.strict? && !bulk_operation
|
|
43
|
+
|
|
44
|
+
logger.initialize_stats
|
|
45
|
+
logger.log_start(name)
|
|
46
|
+
|
|
47
|
+
if Pumice.dry_run? && !bulk_operation && scrubbed.any?
|
|
48
|
+
logger.log_progress("Columns: #{scrubbed_columns.join(', ')}")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
count = if bulk_operation
|
|
52
|
+
run_bulk_operation
|
|
53
|
+
else
|
|
54
|
+
pruned = prune_operation ? run_prune : 0
|
|
55
|
+
scrubbed_count = run_record_sanitization
|
|
56
|
+
pruned + scrubbed_count
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
run_verification unless Pumice.dry_run?
|
|
60
|
+
|
|
61
|
+
logger.log_complete(name, count)
|
|
62
|
+
rescue NameError => e
|
|
63
|
+
logger.log_progress("Skipping #{name} (model not found)")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def run_bulk_operation
|
|
69
|
+
op = bulk_operation
|
|
70
|
+
|
|
71
|
+
if Pumice.dry_run?
|
|
72
|
+
count = if op[:scope]
|
|
73
|
+
model_class.instance_exec(&op[:scope]).count
|
|
74
|
+
else
|
|
75
|
+
model_class.count
|
|
76
|
+
end
|
|
77
|
+
logger.log_progress("[DRY RUN] Would #{op[:type]} #{count} records")
|
|
78
|
+
return count
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
case op[:type]
|
|
82
|
+
when :truncate
|
|
83
|
+
run_truncate
|
|
84
|
+
when :delete
|
|
85
|
+
run_delete(op[:scope])
|
|
86
|
+
when :destroy
|
|
87
|
+
run_destroy(op[:scope])
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def run_truncate
|
|
92
|
+
table = model_class.table_name
|
|
93
|
+
count = model_class.count
|
|
94
|
+
ActiveRecord::Base.connection.truncate(table)
|
|
95
|
+
logger.log_progress("Truncated #{table}")
|
|
96
|
+
count
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def run_delete(scope_block)
|
|
100
|
+
scope = scope_block ? model_class.instance_exec(&scope_block) : model_class.all
|
|
101
|
+
count = scope.delete_all
|
|
102
|
+
logger.log_progress("Deleted #{count} records")
|
|
103
|
+
count
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def run_destroy(scope_block)
|
|
107
|
+
scope = scope_block ? model_class.instance_exec(&scope_block) : model_class.all
|
|
108
|
+
count = scope.destroy_all.count
|
|
109
|
+
logger.log_progress("Destroyed #{count} records")
|
|
110
|
+
count
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def run_prune
|
|
114
|
+
scope_block = prune_operation[:scope]
|
|
115
|
+
scope = model_class.instance_exec(&scope_block)
|
|
116
|
+
|
|
117
|
+
if Pumice.dry_run?
|
|
118
|
+
count = scope.count
|
|
119
|
+
logger.log_progress("[DRY RUN] Would prune #{count} records")
|
|
120
|
+
return count
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
count = scope.delete_all
|
|
124
|
+
logger.log_progress("Pruned #{count} records")
|
|
125
|
+
count
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def run_record_sanitization
|
|
129
|
+
total = model_class.count
|
|
130
|
+
progress = Pumice::Progress.new(title: model_class.name, total: total)
|
|
131
|
+
count = 0
|
|
132
|
+
model_class.find_each do |record|
|
|
133
|
+
scrub!(record)
|
|
134
|
+
run_record_verification(record) unless Pumice.dry_run?
|
|
135
|
+
count += 1
|
|
136
|
+
progress.increment
|
|
137
|
+
rescue => e
|
|
138
|
+
logger.log_error(name, e)
|
|
139
|
+
raise unless Pumice.config.continue_on_error
|
|
140
|
+
end
|
|
141
|
+
progress.finish
|
|
142
|
+
count
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def run_record_verification(record)
|
|
146
|
+
return unless record_verification
|
|
147
|
+
|
|
148
|
+
block = record_verification[:block]
|
|
149
|
+
message = record_verification[:message]
|
|
150
|
+
|
|
151
|
+
# Reload record to get persisted values
|
|
152
|
+
record.reload
|
|
153
|
+
|
|
154
|
+
result = block.call(record)
|
|
155
|
+
|
|
156
|
+
unless result
|
|
157
|
+
error_message = message || "Record verification failed for #{name} (ID: #{record.id})"
|
|
158
|
+
logger.log_progress("VERIFICATION FAILED: #{error_message}")
|
|
159
|
+
raise VerificationError, error_message
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def run_verification
|
|
164
|
+
return unless verification
|
|
165
|
+
|
|
166
|
+
if verification[:block]
|
|
167
|
+
execute_verification(verification[:block], verification[:message])
|
|
168
|
+
elsif verification[:use_default]
|
|
169
|
+
execute_default_verification(verification[:message])
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def execute_verification(block, message)
|
|
174
|
+
result = model_class.instance_exec(&block)
|
|
175
|
+
|
|
176
|
+
unless result
|
|
177
|
+
error_message = message || "Verification failed for #{name}"
|
|
178
|
+
logger.log_progress("VERIFICATION FAILED: #{error_message}")
|
|
179
|
+
raise VerificationError, error_message
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
logger.log_progress("Verification passed")
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def execute_default_verification(message)
|
|
186
|
+
unless bulk_operation
|
|
187
|
+
raise ArgumentError,
|
|
188
|
+
"#{name}: verify_all without a block requires a bulk operation (truncate!, delete_all, destroy_all)"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
default_block = Pumice.config.default_verification.call(model_class, bulk_operation)
|
|
192
|
+
|
|
193
|
+
# For scoped operations, the default policy returns the scope block.
|
|
194
|
+
# We execute it and check .none? to verify records are gone.
|
|
195
|
+
scope_or_result = model_class.instance_exec(&default_block)
|
|
196
|
+
|
|
197
|
+
# If the result is an ActiveRecord relation, check .none?
|
|
198
|
+
# Otherwise treat it as a boolean result
|
|
199
|
+
result = if scope_or_result.respond_to?(:none?)
|
|
200
|
+
scope_or_result.none?
|
|
201
|
+
else
|
|
202
|
+
scope_or_result
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
unless result
|
|
206
|
+
error_message = message || "Verification failed for #{name}"
|
|
207
|
+
logger.log_progress("VERIFICATION FAILED: #{error_message}")
|
|
208
|
+
raise VerificationError, error_message
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
logger.log_progress("Verification passed")
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def persist(record, attr_name, result)
|
|
215
|
+
if attr_name
|
|
216
|
+
persist_attribute(record, attr_name, result)
|
|
217
|
+
else
|
|
218
|
+
persist_record(record, result)
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def persist_record(record, data)
|
|
223
|
+
if Pumice.dry_run?
|
|
224
|
+
details = if Pumice.verbose?
|
|
225
|
+
changes = data.map { |attr, new_val|
|
|
226
|
+
"#{attr} (#{record.read_attribute(attr).inspect} → #{new_val.inspect})"
|
|
227
|
+
}.join(', ')
|
|
228
|
+
"ID #{record.id}: #{changes}"
|
|
229
|
+
else
|
|
230
|
+
"ID #{record.id} — #{data.keys.join(', ')}"
|
|
231
|
+
end
|
|
232
|
+
logger.log_record(:would_sanitize, details)
|
|
233
|
+
else
|
|
234
|
+
record.update_columns(data)
|
|
235
|
+
logger.log_record(:sanitized, "ID #{record.id}")
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def persist_attribute(record, attr_name, value)
|
|
240
|
+
if Pumice.dry_run?
|
|
241
|
+
original = record.read_attribute(attr_name)
|
|
242
|
+
logger.log_record(:would_sanitize, "ID #{record.id}.#{attr_name} (#{original.inspect} → #{value.inspect})")
|
|
243
|
+
else
|
|
244
|
+
record.update_column(attr_name, value)
|
|
245
|
+
logger.log_record(:sanitized, "ID #{record.id}.#{attr_name}")
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Seeds Faker per-record for deterministic output. Thread-safe because
|
|
250
|
+
# Faker 3.x stores Config.random in Thread.current (gemspec requires >= 3.0).
|
|
251
|
+
def with_seed_for(record)
|
|
252
|
+
previous = Faker::Config.random
|
|
253
|
+
Faker::Config.random = Random.new(record&.id || record.object_id)
|
|
254
|
+
yield
|
|
255
|
+
ensure
|
|
256
|
+
Faker::Config.random = previous
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def validate_coverage!
|
|
260
|
+
return if undefined_columns.empty?
|
|
261
|
+
|
|
262
|
+
raise UndefinedAttributeError,
|
|
263
|
+
"#{name} is missing definitions for: #{undefined_columns.join(', ')}. " \
|
|
264
|
+
"Add scrub(:column) { value } for each, or set Pumice.configure { |c| c.strict = false }"
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def logger
|
|
268
|
+
Pumice::Logger
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
attr_reader :record
|
|
273
|
+
|
|
274
|
+
def initialize(record)
|
|
275
|
+
@record = record
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def scrub(attr_name, raw_value = nil)
|
|
279
|
+
raw_value ||= record.send(attr_name)
|
|
280
|
+
block = self.class.scrubbed[attr_name.to_sym]
|
|
281
|
+
return raw_value unless block
|
|
282
|
+
|
|
283
|
+
instance_exec(raw_value, &block)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def scrub_all
|
|
287
|
+
self.class.scrubbed.keys.each_with_object({}) do |attr_name, hash|
|
|
288
|
+
hash[attr_name] = scrub(attr_name)
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Read an original database value, bypassing scrubbing.
|
|
293
|
+
#
|
|
294
|
+
# scrub(:email) { "#{raw(:first_name)}.#{raw(:last_name)}@example.test" }
|
|
295
|
+
def raw(attr_name)
|
|
296
|
+
record.public_send(attr_name)
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Provides a clean DSL for referencing attributes within scrub blocks:
|
|
300
|
+
# - Bare attribute names return scrubbed values: `name` → scrub(:name)
|
|
301
|
+
# - raw_* methods return original database values: `raw_name` → raw(:name)
|
|
302
|
+
def method_missing(method_name, *args, &block)
|
|
303
|
+
if raw_attribute_method?(method_name)
|
|
304
|
+
return raw(extract_raw_attribute_name(method_name))
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
if self.class.scrubbed_column?(method_name)
|
|
308
|
+
return scrub(method_name)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
if record.respond_to?(method_name)
|
|
312
|
+
return record.public_send(method_name, *args, &block)
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
super
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def respond_to_missing?(method_name, include_private = false)
|
|
319
|
+
raw_attribute_method?(method_name) ||
|
|
320
|
+
self.class.scrubbed_column?(method_name) ||
|
|
321
|
+
record.respond_to?(method_name, include_private) ||
|
|
322
|
+
super
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
private
|
|
326
|
+
|
|
327
|
+
def raw_attribute_method?(method_name)
|
|
328
|
+
method_name.to_s.start_with?('raw_') &&
|
|
329
|
+
record.respond_to?(extract_raw_attribute_name(method_name))
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def extract_raw_attribute_name(method_name)
|
|
333
|
+
method_name.to_s.delete_prefix('raw_').to_sym
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pumice
|
|
4
|
+
module SoftScrubbing
|
|
5
|
+
# Policy determines when soft scrubbing applies to a record.
|
|
6
|
+
#
|
|
7
|
+
# TRANSITIONAL: This module currently uses a binary on/off policy check.
|
|
8
|
+
# Future versions will support:
|
|
9
|
+
# - Per-attribute policies (SSN vs email have different rules)
|
|
10
|
+
# - Role-graduated scrubbing (admin/manager/user see different levels)
|
|
11
|
+
# - Viewer context passed to scrub blocks for conditional masking
|
|
12
|
+
#
|
|
13
|
+
# See lib/pumice/README.md for the roadmap.
|
|
14
|
+
module Policy
|
|
15
|
+
extend self
|
|
16
|
+
|
|
17
|
+
THREAD_KEY = :pumice_soft_scrub_context
|
|
18
|
+
CONTEXT_SET_KEY = :pumice_soft_scrub_context_set
|
|
19
|
+
|
|
20
|
+
def context=(context)
|
|
21
|
+
Thread.current[THREAD_KEY] = context
|
|
22
|
+
Thread.current[CONTEXT_SET_KEY] = true
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def current(record = nil)
|
|
26
|
+
resolve(Thread.current[THREAD_KEY], record)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns true if context has been explicitly set for this request/thread.
|
|
30
|
+
# Used to distinguish "no logged-in user" from "not in a request context".
|
|
31
|
+
def context_set?
|
|
32
|
+
Thread.current[CONTEXT_SET_KEY] == true
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def with_context(context)
|
|
36
|
+
previous = Thread.current[THREAD_KEY]
|
|
37
|
+
previous_set = Thread.current[CONTEXT_SET_KEY]
|
|
38
|
+
self.context = context
|
|
39
|
+
yield
|
|
40
|
+
ensure
|
|
41
|
+
Thread.current[THREAD_KEY] = previous
|
|
42
|
+
Thread.current[CONTEXT_SET_KEY] = previous_set
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Temporarily disable soft scrubbing for a block.
|
|
46
|
+
# Used during authentication/session management to skip policy checks.
|
|
47
|
+
def without_context
|
|
48
|
+
previous = Thread.current[THREAD_KEY]
|
|
49
|
+
previous_set = Thread.current[CONTEXT_SET_KEY]
|
|
50
|
+
Thread.current[THREAD_KEY] = nil
|
|
51
|
+
Thread.current[CONTEXT_SET_KEY] = nil
|
|
52
|
+
yield
|
|
53
|
+
ensure
|
|
54
|
+
Thread.current[THREAD_KEY] = previous
|
|
55
|
+
Thread.current[CONTEXT_SET_KEY] = previous_set
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def enabled_for?(record)
|
|
59
|
+
return false unless Pumice.soft_scrubbing?
|
|
60
|
+
return false unless context_set? # Skip during boot/initialization
|
|
61
|
+
|
|
62
|
+
viewer = current(record)
|
|
63
|
+
Pumice.config.soft_scrubbing[:policy].call(record, viewer)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def reset!
|
|
67
|
+
Thread.current[THREAD_KEY] = nil
|
|
68
|
+
Thread.current[CONTEXT_SET_KEY] = nil
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def resolve(raw_context, record)
|
|
74
|
+
config_context = Pumice.soft_scrubbing? ? Pumice.config.soft_scrubbing[:context] : nil
|
|
75
|
+
value = raw_context.nil? ? config_context : raw_context
|
|
76
|
+
|
|
77
|
+
case value
|
|
78
|
+
when Proc
|
|
79
|
+
value.arity.zero? ? value.call : value.call(record)
|
|
80
|
+
when Symbol, String
|
|
81
|
+
resolve_symbol(value.to_sym, record)
|
|
82
|
+
when nil
|
|
83
|
+
nil
|
|
84
|
+
else
|
|
85
|
+
value
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def resolve_symbol(method_name, record)
|
|
90
|
+
if record&.respond_to?(method_name)
|
|
91
|
+
record.public_send(method_name)
|
|
92
|
+
elsif Pumice.respond_to?(method_name)
|
|
93
|
+
Pumice.public_send(method_name)
|
|
94
|
+
elsif defined?(Current) && Current.respond_to?(method_name)
|
|
95
|
+
Current.public_send(method_name)
|
|
96
|
+
elsif Thread.current.key?(method_name)
|
|
97
|
+
Thread.current[method_name]
|
|
98
|
+
else
|
|
99
|
+
nil
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pumice
|
|
4
|
+
module SoftScrubbing
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
RECURSION_GUARD_KEY = :pumice_soft_scrub_in_progress
|
|
8
|
+
|
|
9
|
+
# System attributes that should never be scrubbed (needed for Rails internals)
|
|
10
|
+
SYSTEM_ATTRIBUTES = %w[id created_at updated_at].freeze
|
|
11
|
+
|
|
12
|
+
module AttributeInterceptor
|
|
13
|
+
def _read_attribute(attr_name)
|
|
14
|
+
# Prevent infinite recursion - if we're already inside the interceptor, bail out
|
|
15
|
+
return super if Thread.current[Pumice::SoftScrubbing::RECURSION_GUARD_KEY]
|
|
16
|
+
|
|
17
|
+
# Quick check: skip if soft_scrubbing not configured
|
|
18
|
+
return super unless Pumice.soft_scrubbing?
|
|
19
|
+
|
|
20
|
+
# Skip system attributes needed for Rails/Devise internals (session serialization, etc.)
|
|
21
|
+
return super if Pumice::SoftScrubbing::SYSTEM_ATTRIBUTES.include?(attr_name.to_s)
|
|
22
|
+
|
|
23
|
+
begin
|
|
24
|
+
Thread.current[Pumice::SoftScrubbing::RECURSION_GUARD_KEY] = true
|
|
25
|
+
|
|
26
|
+
unless Pumice.soft_scrubbing_enabled_for?(self)
|
|
27
|
+
return super
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
sanitizer = Pumice.sanitizer_for(self.class)
|
|
31
|
+
return super unless sanitizer.scrubbed_column?(attr_name)
|
|
32
|
+
|
|
33
|
+
soft_scrubbed_value(attr_name, sanitizer)
|
|
34
|
+
ensure
|
|
35
|
+
Thread.current[Pumice::SoftScrubbing::RECURSION_GUARD_KEY] = false
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def reload(*)
|
|
40
|
+
@_soft_scrubbed_cache = nil
|
|
41
|
+
super
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def write_attribute(attr_name, value)
|
|
45
|
+
@_soft_scrubbed_cache&.delete(attr_name.to_s)
|
|
46
|
+
super
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def soft_scrubbed_value(attr_name, sanitizer)
|
|
52
|
+
@_soft_scrubbed_cache ||= {}
|
|
53
|
+
@_soft_scrubbed_cache[attr_name] ||= begin
|
|
54
|
+
raw_value = @attributes.fetch_value(attr_name.to_s)
|
|
55
|
+
sanitizer.sanitize(self, attr_name, raw_value: raw_value)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Call this once at boot to enable the feature
|
|
61
|
+
def self.init!
|
|
62
|
+
return if @initialized
|
|
63
|
+
|
|
64
|
+
ActiveRecord::Base.prepend(AttributeInterceptor)
|
|
65
|
+
@initialized = true
|
|
66
|
+
|
|
67
|
+
# Eager-load sanitizers using Rails' reloader
|
|
68
|
+
Rails.application.reloader.to_prepare do
|
|
69
|
+
Pumice::SoftScrubbing.eager_load_sanitizers!
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
Rails.logger.info("[Pumice] Soft scrubbing initialized")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.initialized?
|
|
76
|
+
@initialized == true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# For debugging: force re-initialization (use in console only)
|
|
80
|
+
def self.reinit!
|
|
81
|
+
@initialized = false
|
|
82
|
+
init!
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.eager_load_sanitizers!
|
|
86
|
+
sanitizer_paths = Rails.root.join('app/sanitizers')
|
|
87
|
+
return unless sanitizer_paths.exist?
|
|
88
|
+
|
|
89
|
+
Dir[sanitizer_paths.join('**/*.rb')].sort.each do |file|
|
|
90
|
+
relative_path = Pathname.new(file).relative_path_from(sanitizer_paths)
|
|
91
|
+
const_name = relative_path.to_s.delete_suffix('.rb').camelize
|
|
92
|
+
|
|
93
|
+
begin
|
|
94
|
+
const_name.constantize
|
|
95
|
+
rescue NameError => e
|
|
96
|
+
Rails.logger.warn("[Pumice] Could not load sanitizer #{const_name}: #{e.message}")
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pumice
|
|
4
|
+
class Validator
|
|
5
|
+
Result = Struct.new(:errors, :checks, keyword_init: true) do
|
|
6
|
+
def passed?
|
|
7
|
+
errors.empty?
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
Check = Struct.new(:name, :count, :passed, keyword_init: true)
|
|
12
|
+
|
|
13
|
+
def initialize(email_domains: nil)
|
|
14
|
+
@email_domains = Array(email_domains || Pumice.config.sensitive_email_domains)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def run
|
|
18
|
+
errors = []
|
|
19
|
+
checks = []
|
|
20
|
+
|
|
21
|
+
# Check for real email domains
|
|
22
|
+
email_check = check_real_emails
|
|
23
|
+
errors.concat(email_check[:errors])
|
|
24
|
+
checks << email_check[:check]
|
|
25
|
+
|
|
26
|
+
# Check for test emails
|
|
27
|
+
checks << check_test_emails
|
|
28
|
+
|
|
29
|
+
# Check for cleared tokens
|
|
30
|
+
token_checks = check_cleared_tokens
|
|
31
|
+
errors.concat(token_checks[:errors])
|
|
32
|
+
checks.concat(token_checks[:checks])
|
|
33
|
+
|
|
34
|
+
# Check for cleared external IDs
|
|
35
|
+
external_checks = check_external_ids
|
|
36
|
+
errors.concat(external_checks[:errors])
|
|
37
|
+
checks.concat(external_checks[:checks])
|
|
38
|
+
|
|
39
|
+
Result.new(errors: errors, checks: checks)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def email_model
|
|
45
|
+
@email_model ||= Pumice.config.sensitive_email_model.constantize
|
|
46
|
+
rescue NameError
|
|
47
|
+
raise NameError,
|
|
48
|
+
"Pumice validator: model '#{Pumice.config.sensitive_email_model}' not found. " \
|
|
49
|
+
"Set config.sensitive_email_model to your app's user model (e.g. 'Account', 'Member')."
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def email_column
|
|
53
|
+
@email_column ||= Pumice.config.sensitive_email_column
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def check_real_emails
|
|
57
|
+
errors = []
|
|
58
|
+
|
|
59
|
+
@email_domains.each do |domain|
|
|
60
|
+
count = email_model.where("#{email_column} LIKE ?", "%@#{domain}").count
|
|
61
|
+
errors << "Found #{count} emails with real domain #{domain}" if count > 0
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
{
|
|
65
|
+
errors: errors,
|
|
66
|
+
check: Check.new(name: 'real_email_domains', count: errors.size, passed: errors.empty?)
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def check_test_emails
|
|
71
|
+
count = email_model.where("#{email_column} LIKE ?", "%@example.test").count
|
|
72
|
+
Check.new(name: 'test_emails', count: count, passed: count > 0)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def check_cleared_tokens
|
|
76
|
+
errors = []
|
|
77
|
+
checks = []
|
|
78
|
+
|
|
79
|
+
token_columns = Pumice.config.sensitive_token_columns
|
|
80
|
+
|
|
81
|
+
token_columns.each do |column|
|
|
82
|
+
next unless email_model.column_names.include?(column.to_s)
|
|
83
|
+
|
|
84
|
+
count = email_model.where.not(column => nil).count
|
|
85
|
+
if count > 0
|
|
86
|
+
errors << "Found #{count} users with #{column}"
|
|
87
|
+
end
|
|
88
|
+
checks << Check.new(name: column.to_s, count: count, passed: count == 0)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
{ errors: errors, checks: checks }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def check_external_ids
|
|
95
|
+
errors = []
|
|
96
|
+
checks = []
|
|
97
|
+
|
|
98
|
+
external_id_columns = Pumice.config.sensitive_external_id_columns
|
|
99
|
+
|
|
100
|
+
external_id_columns.each do |column|
|
|
101
|
+
next unless email_model.column_names.include?(column.to_s)
|
|
102
|
+
|
|
103
|
+
count = email_model.where.not(column => nil).count
|
|
104
|
+
if count > 0
|
|
105
|
+
errors << "Found #{count} users with #{column} (should be cleared)"
|
|
106
|
+
end
|
|
107
|
+
checks << Check.new(name: column.to_s, count: count, passed: count == 0)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
{ errors: errors, checks: checks }
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
data/lib/pumice.rb
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'pumice/version'
|
|
4
|
+
|
|
5
|
+
module Pumice; end
|
|
6
|
+
|
|
7
|
+
require_relative 'pumice/configuration'
|
|
8
|
+
require_relative 'pumice/output'
|
|
9
|
+
require_relative 'pumice/progress'
|
|
10
|
+
require_relative 'pumice/helpers'
|
|
11
|
+
require_relative 'pumice/logger'
|
|
12
|
+
require_relative 'pumice/dsl'
|
|
13
|
+
require_relative 'pumice/sanitizer'
|
|
14
|
+
require_relative 'pumice/empty_sanitizer'
|
|
15
|
+
require_relative 'pumice/soft_scrubbing'
|
|
16
|
+
require_relative 'pumice/analyzer'
|
|
17
|
+
require_relative 'pumice/validator'
|
|
18
|
+
require_relative 'pumice/runner'
|
|
19
|
+
require_relative 'pumice/dump_generator'
|
|
20
|
+
require_relative 'pumice/pruner'
|
|
21
|
+
require_relative 'pumice/pruning/analyzer'
|
|
22
|
+
require_relative 'pumice/safe_scrubber'
|
|
23
|
+
require_relative 'pumice/railtie' if defined?(Rails::Railtie)
|