column_anonymizer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/.rspec_status +15 -0
- data/CHANGELOG.md +49 -0
- data/CUSTOM_GENERATORS_COMPLETE.md +507 -0
- data/CUSTOM_GENERATORS_GUIDE.md +515 -0
- data/CUSTOM_GENERATORS_IMPLEMENTATION.md +471 -0
- data/CUSTOM_GENERATORS_QUICK_REF.md +95 -0
- data/FEATURE_COMPLETE.md +287 -0
- data/GEMSPEC_FIX.md +90 -0
- data/IMPLEMENTATION_SUMMARY.md +205 -0
- data/QUICK_REFERENCE.md +92 -0
- data/RAKE_TASKS_GUIDE.md +469 -0
- data/RAKE_TASKS_IMPLEMENTATION.md +363 -0
- data/RAKE_TASKS_QUICK_REF.md +164 -0
- data/README.md +389 -0
- data/Rakefile +12 -0
- data/SCAN_GENERATOR_TEST.md +141 -0
- data/WORKFLOW_GUIDE.md +368 -0
- data/YAML_MIGRATION_GUIDE.md +284 -0
- data/lib/column_anonymizer/anonymizer.rb +103 -0
- data/lib/column_anonymizer/encryptable.rb +25 -0
- data/lib/column_anonymizer/railtie.rb +15 -0
- data/lib/column_anonymizer/schema_loader.rb +44 -0
- data/lib/column_anonymizer/version.rb +5 -0
- data/lib/column_anonymizer.rb +9 -0
- data/lib/generators/column_anonymizer/initializer/initializer_generator.rb +25 -0
- data/lib/generators/column_anonymizer/initializer/templates/column_anonymizer.rb +77 -0
- data/lib/generators/column_anonymizer/install/README +46 -0
- data/lib/generators/column_anonymizer/install/install_generator.rb +36 -0
- data/lib/generators/column_anonymizer/install/templates/encrypted_columns.yml +29 -0
- data/lib/generators/column_anonymizer/scan/scan_generator.rb +250 -0
- data/lib/tasks/column_anonymizer.rake +318 -0
- metadata +108 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
namespace :column_anonymizer do
|
|
4
|
+
desc "Anonymize all encrypted columns defined in config/encrypted_columns.yml"
|
|
5
|
+
task anonymize_all: :environment do
|
|
6
|
+
require 'column_anonymizer'
|
|
7
|
+
|
|
8
|
+
# Load the schema
|
|
9
|
+
schema = ColumnAnonymizer::SchemaLoader.load_schema
|
|
10
|
+
|
|
11
|
+
if schema.empty?
|
|
12
|
+
puts "ā No models found in config/encrypted_columns.yml"
|
|
13
|
+
exit 1
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
puts "š Found #{schema.keys.size} model(s) in configuration"
|
|
17
|
+
puts "=" * 70
|
|
18
|
+
|
|
19
|
+
total_anonymized = 0
|
|
20
|
+
total_errors = 0
|
|
21
|
+
|
|
22
|
+
schema.each do |model_name, columns|
|
|
23
|
+
begin
|
|
24
|
+
# Get the model class
|
|
25
|
+
model_class = model_name.constantize
|
|
26
|
+
|
|
27
|
+
puts "\nš Processing #{model_name}..."
|
|
28
|
+
puts " Columns: #{columns.keys.join(', ')}"
|
|
29
|
+
|
|
30
|
+
# Count records
|
|
31
|
+
record_count = model_class.count
|
|
32
|
+
if record_count.zero?
|
|
33
|
+
puts " ā ļø No records found, skipping"
|
|
34
|
+
next
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
puts " Records: #{record_count}"
|
|
38
|
+
|
|
39
|
+
# Anonymize each record with progress
|
|
40
|
+
anonymized_count = 0
|
|
41
|
+
error_count = 0
|
|
42
|
+
|
|
43
|
+
model_class.find_each.with_index do |record, index|
|
|
44
|
+
begin
|
|
45
|
+
ColumnAnonymizer::Anonymizer.anonymize_model!(record)
|
|
46
|
+
anonymized_count += 1
|
|
47
|
+
|
|
48
|
+
# Show progress every 100 records
|
|
49
|
+
if (index + 1) % 100 == 0
|
|
50
|
+
print "\r Progress: #{index + 1}/#{record_count}"
|
|
51
|
+
$stdout.flush
|
|
52
|
+
end
|
|
53
|
+
rescue => e
|
|
54
|
+
error_count += 1
|
|
55
|
+
puts "\n ā Error anonymizing #{model_name} ID #{record.id}: #{e.message}"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Clear progress line and show final result
|
|
60
|
+
print "\r" + " " * 50 + "\r" if record_count > 100
|
|
61
|
+
puts " ā
Anonymized #{anonymized_count} record(s)"
|
|
62
|
+
puts " ā ļø #{error_count} error(s)" if error_count > 0
|
|
63
|
+
|
|
64
|
+
total_anonymized += anonymized_count
|
|
65
|
+
total_errors += error_count
|
|
66
|
+
|
|
67
|
+
rescue NameError
|
|
68
|
+
puts " ā Model class '#{model_name}' not found"
|
|
69
|
+
total_errors += 1
|
|
70
|
+
rescue => e
|
|
71
|
+
puts " ā Error processing #{model_name}: #{e.message}"
|
|
72
|
+
total_errors += 1
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
puts "\n" + "=" * 70
|
|
77
|
+
puts "š Anonymization complete!"
|
|
78
|
+
puts " Total records anonymized: #{total_anonymized}"
|
|
79
|
+
puts " Total errors: #{total_errors}" if total_errors > 0
|
|
80
|
+
puts "=" * 70
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
desc "Anonymize a specific model"
|
|
84
|
+
task :anonymize_model, [:model_name] => :environment do |t, args|
|
|
85
|
+
require 'column_anonymizer'
|
|
86
|
+
|
|
87
|
+
unless args[:model_name]
|
|
88
|
+
puts "ā Please provide a model name: rake column_anonymizer:anonymize_model[User]"
|
|
89
|
+
exit 1
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
model_name = args[:model_name]
|
|
93
|
+
schema = ColumnAnonymizer::SchemaLoader.load_schema
|
|
94
|
+
|
|
95
|
+
unless schema.key?(model_name)
|
|
96
|
+
puts "ā Model '#{model_name}' not found in config/encrypted_columns.yml"
|
|
97
|
+
puts "Available models: #{schema.keys.join(', ')}"
|
|
98
|
+
exit 1
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
begin
|
|
102
|
+
model_class = model_name.constantize
|
|
103
|
+
columns = schema[model_name]
|
|
104
|
+
|
|
105
|
+
puts "š Anonymizing #{model_name}"
|
|
106
|
+
puts " Columns: #{columns.keys.join(', ')}"
|
|
107
|
+
|
|
108
|
+
record_count = model_class.count
|
|
109
|
+
if record_count.zero?
|
|
110
|
+
puts " ā ļø No records found"
|
|
111
|
+
exit 0
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
puts " Records: #{record_count}"
|
|
115
|
+
|
|
116
|
+
anonymized_count = 0
|
|
117
|
+
error_count = 0
|
|
118
|
+
|
|
119
|
+
model_class.find_each.with_index do |record, index|
|
|
120
|
+
begin
|
|
121
|
+
ColumnAnonymizer::Anonymizer.anonymize_model!(record)
|
|
122
|
+
anonymized_count += 1
|
|
123
|
+
|
|
124
|
+
if (index + 1) % 100 == 0
|
|
125
|
+
print "\r Progress: #{index + 1}/#{record_count}"
|
|
126
|
+
$stdout.flush
|
|
127
|
+
end
|
|
128
|
+
rescue => e
|
|
129
|
+
error_count += 1
|
|
130
|
+
puts "\n ā Error anonymizing record ID #{record.id}: #{e.message}"
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
print "\r" + " " * 50 + "\r" if record_count > 100
|
|
135
|
+
puts " ā
Anonymized #{anonymized_count} record(s)"
|
|
136
|
+
puts " ā ļø #{error_count} error(s)" if error_count > 0
|
|
137
|
+
|
|
138
|
+
rescue NameError
|
|
139
|
+
puts "ā Model class '#{model_name}' not found"
|
|
140
|
+
exit 1
|
|
141
|
+
rescue => e
|
|
142
|
+
puts "ā Error: #{e.message}"
|
|
143
|
+
exit 1
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
desc "Preview anonymization without saving"
|
|
148
|
+
task preview: :environment do
|
|
149
|
+
require 'column_anonymizer'
|
|
150
|
+
|
|
151
|
+
schema = ColumnAnonymizer::SchemaLoader.load_schema
|
|
152
|
+
|
|
153
|
+
if schema.empty?
|
|
154
|
+
puts "ā No models found in config/encrypted_columns.yml"
|
|
155
|
+
exit 1
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
puts "š Anonymization Preview"
|
|
159
|
+
puts "=" * 70
|
|
160
|
+
|
|
161
|
+
schema.each do |model_name, columns|
|
|
162
|
+
begin
|
|
163
|
+
model_class = model_name.constantize
|
|
164
|
+
record_count = model_class.count
|
|
165
|
+
|
|
166
|
+
puts "\n#{model_name}:"
|
|
167
|
+
puts " Columns to anonymize: #{columns.keys.join(', ')}"
|
|
168
|
+
puts " Records to process: #{record_count}"
|
|
169
|
+
puts " Types: #{columns.values.join(', ')}"
|
|
170
|
+
|
|
171
|
+
# Show example for first record
|
|
172
|
+
if record_count > 0
|
|
173
|
+
record = model_class.first
|
|
174
|
+
puts "\n Example (first record):"
|
|
175
|
+
columns.keys.each do |column_name|
|
|
176
|
+
current_value = record.send(column_name)
|
|
177
|
+
puts " #{column_name}: #{current_value.to_s[0..50]}#{current_value.to_s.length > 50 ? '...' : ''}"
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
rescue NameError
|
|
182
|
+
puts "\n#{model_name}:"
|
|
183
|
+
puts " ā Model class not found"
|
|
184
|
+
rescue => e
|
|
185
|
+
puts "\n#{model_name}:"
|
|
186
|
+
puts " ā Error: #{e.message}"
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
puts "\n" + "=" * 70
|
|
191
|
+
puts "š” Run 'rake column_anonymizer:anonymize_all' to perform anonymization"
|
|
192
|
+
puts "=" * 70
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
desc "Anonymize records matching a condition"
|
|
196
|
+
task :anonymize_where, [:model_name, :condition] => :environment do |t, args|
|
|
197
|
+
require 'column_anonymizer'
|
|
198
|
+
|
|
199
|
+
unless args[:model_name] && args[:condition]
|
|
200
|
+
puts "ā Usage: rake column_anonymizer:anonymize_where[User,'created_at < \"2023-01-01\"']"
|
|
201
|
+
exit 1
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
model_name = args[:model_name]
|
|
205
|
+
condition = args[:condition]
|
|
206
|
+
schema = ColumnAnonymizer::SchemaLoader.load_schema
|
|
207
|
+
|
|
208
|
+
unless schema.key?(model_name)
|
|
209
|
+
puts "ā Model '#{model_name}' not found in config/encrypted_columns.yml"
|
|
210
|
+
exit 1
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
begin
|
|
214
|
+
model_class = model_name.constantize
|
|
215
|
+
columns = schema[model_name]
|
|
216
|
+
|
|
217
|
+
puts "š Anonymizing #{model_name} where #{condition}"
|
|
218
|
+
puts " Columns: #{columns.keys.join(', ')}"
|
|
219
|
+
|
|
220
|
+
records = model_class.where(condition)
|
|
221
|
+
record_count = records.count
|
|
222
|
+
|
|
223
|
+
if record_count.zero?
|
|
224
|
+
puts " ā ļø No records match the condition"
|
|
225
|
+
exit 0
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
puts " Matching records: #{record_count}"
|
|
229
|
+
print " ā ļø This will anonymize #{record_count} record(s). Continue? (y/N): "
|
|
230
|
+
|
|
231
|
+
response = STDIN.gets.chomp.downcase
|
|
232
|
+
unless response == 'y' || response == 'yes'
|
|
233
|
+
puts " ā Cancelled"
|
|
234
|
+
exit 0
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
anonymized_count = 0
|
|
238
|
+
error_count = 0
|
|
239
|
+
|
|
240
|
+
records.find_each.with_index do |record, index|
|
|
241
|
+
begin
|
|
242
|
+
ColumnAnonymizer::Anonymizer.anonymize_model!(record)
|
|
243
|
+
anonymized_count += 1
|
|
244
|
+
|
|
245
|
+
if (index + 1) % 100 == 0
|
|
246
|
+
print "\r Progress: #{index + 1}/#{record_count}"
|
|
247
|
+
$stdout.flush
|
|
248
|
+
end
|
|
249
|
+
rescue => e
|
|
250
|
+
error_count += 1
|
|
251
|
+
puts "\n ā Error anonymizing record ID #{record.id}: #{e.message}"
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
print "\r" + " " * 50 + "\r" if record_count > 100
|
|
256
|
+
puts " ā
Anonymized #{anonymized_count} record(s)"
|
|
257
|
+
puts " ā ļø #{error_count} error(s)" if error_count > 0
|
|
258
|
+
|
|
259
|
+
rescue NameError
|
|
260
|
+
puts "ā Model class '#{model_name}' not found"
|
|
261
|
+
exit 1
|
|
262
|
+
rescue => e
|
|
263
|
+
puts "ā Error: #{e.message}"
|
|
264
|
+
puts " Trace: #{e.backtrace.first(3).join("\n ")}"
|
|
265
|
+
exit 1
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
desc "Show statistics about encrypted columns"
|
|
270
|
+
task stats: :environment do
|
|
271
|
+
require 'column_anonymizer'
|
|
272
|
+
|
|
273
|
+
schema = ColumnAnonymizer::SchemaLoader.load_schema
|
|
274
|
+
|
|
275
|
+
if schema.empty?
|
|
276
|
+
puts "ā No models found in config/encrypted_columns.yml"
|
|
277
|
+
exit 1
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
puts "š Column Anonymizer Statistics"
|
|
281
|
+
puts "=" * 70
|
|
282
|
+
|
|
283
|
+
total_models = schema.keys.size
|
|
284
|
+
total_columns = schema.values.map(&:keys).flatten.size
|
|
285
|
+
total_records = 0
|
|
286
|
+
|
|
287
|
+
puts "\nModels configured: #{total_models}"
|
|
288
|
+
puts "Total encrypted columns: #{total_columns}"
|
|
289
|
+
puts "\nDetailed breakdown:\n"
|
|
290
|
+
|
|
291
|
+
schema.each do |model_name, columns|
|
|
292
|
+
begin
|
|
293
|
+
model_class = model_name.constantize
|
|
294
|
+
record_count = model_class.count
|
|
295
|
+
total_records += record_count
|
|
296
|
+
|
|
297
|
+
puts "#{model_name}:"
|
|
298
|
+
puts " Columns: #{columns.keys.size} (#{columns.keys.join(', ')})"
|
|
299
|
+
puts " Records: #{record_count}"
|
|
300
|
+
puts " Types: #{columns.values.uniq.join(', ')}"
|
|
301
|
+
puts ""
|
|
302
|
+
|
|
303
|
+
rescue NameError
|
|
304
|
+
puts "#{model_name}:"
|
|
305
|
+
puts " ā Model class not found"
|
|
306
|
+
puts ""
|
|
307
|
+
rescue => e
|
|
308
|
+
puts "#{model_name}:"
|
|
309
|
+
puts " ā Error: #{e.message}"
|
|
310
|
+
puts ""
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
puts "=" * 70
|
|
315
|
+
puts "Total records across all models: #{total_records}"
|
|
316
|
+
puts "=" * 70
|
|
317
|
+
end
|
|
318
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: column_anonymizer
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Hunter-Kendall
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-02-05 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rails
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '7.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '7.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: faker
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '3.0'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '3.0'
|
|
41
|
+
description: A Rails gem that uses a YAML configuration file to track encrypted column
|
|
42
|
+
types, enabling intelligent anonymization of encrypted data with contextually appropriate
|
|
43
|
+
fake data generators.
|
|
44
|
+
email:
|
|
45
|
+
- hunter.kendall@rolemodelsoftware.com
|
|
46
|
+
executables: []
|
|
47
|
+
extensions: []
|
|
48
|
+
extra_rdoc_files: []
|
|
49
|
+
files:
|
|
50
|
+
- ".rspec"
|
|
51
|
+
- ".rspec_status"
|
|
52
|
+
- CHANGELOG.md
|
|
53
|
+
- CUSTOM_GENERATORS_COMPLETE.md
|
|
54
|
+
- CUSTOM_GENERATORS_GUIDE.md
|
|
55
|
+
- CUSTOM_GENERATORS_IMPLEMENTATION.md
|
|
56
|
+
- CUSTOM_GENERATORS_QUICK_REF.md
|
|
57
|
+
- FEATURE_COMPLETE.md
|
|
58
|
+
- GEMSPEC_FIX.md
|
|
59
|
+
- IMPLEMENTATION_SUMMARY.md
|
|
60
|
+
- QUICK_REFERENCE.md
|
|
61
|
+
- RAKE_TASKS_GUIDE.md
|
|
62
|
+
- RAKE_TASKS_IMPLEMENTATION.md
|
|
63
|
+
- RAKE_TASKS_QUICK_REF.md
|
|
64
|
+
- README.md
|
|
65
|
+
- Rakefile
|
|
66
|
+
- SCAN_GENERATOR_TEST.md
|
|
67
|
+
- WORKFLOW_GUIDE.md
|
|
68
|
+
- YAML_MIGRATION_GUIDE.md
|
|
69
|
+
- lib/column_anonymizer.rb
|
|
70
|
+
- lib/column_anonymizer/anonymizer.rb
|
|
71
|
+
- lib/column_anonymizer/encryptable.rb
|
|
72
|
+
- lib/column_anonymizer/railtie.rb
|
|
73
|
+
- lib/column_anonymizer/schema_loader.rb
|
|
74
|
+
- lib/column_anonymizer/version.rb
|
|
75
|
+
- lib/generators/column_anonymizer/initializer/initializer_generator.rb
|
|
76
|
+
- lib/generators/column_anonymizer/initializer/templates/column_anonymizer.rb
|
|
77
|
+
- lib/generators/column_anonymizer/install/README
|
|
78
|
+
- lib/generators/column_anonymizer/install/install_generator.rb
|
|
79
|
+
- lib/generators/column_anonymizer/install/templates/encrypted_columns.yml
|
|
80
|
+
- lib/generators/column_anonymizer/scan/scan_generator.rb
|
|
81
|
+
- lib/tasks/column_anonymizer.rake
|
|
82
|
+
homepage: https://github.com/hunter-kendall/column_anonymizer
|
|
83
|
+
licenses: []
|
|
84
|
+
metadata:
|
|
85
|
+
homepage_uri: https://github.com/hunter-kendall/column_anonymizer
|
|
86
|
+
source_code_uri: https://github.com/hunter-kendall/column_anonymizer
|
|
87
|
+
changelog_uri: https://github.com/hunter-kendall/column_anonymizer/blob/main/CHANGELOG.md
|
|
88
|
+
post_install_message:
|
|
89
|
+
rdoc_options: []
|
|
90
|
+
require_paths:
|
|
91
|
+
- lib
|
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - ">="
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: 3.1.0
|
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
|
+
requirements:
|
|
99
|
+
- - ">="
|
|
100
|
+
- !ruby/object:Gem::Version
|
|
101
|
+
version: '0'
|
|
102
|
+
requirements: []
|
|
103
|
+
rubygems_version: 3.5.22
|
|
104
|
+
signing_key:
|
|
105
|
+
specification_version: 4
|
|
106
|
+
summary: YAML-based encrypted column type tracking for intelligent anonymization in
|
|
107
|
+
Rails
|
|
108
|
+
test_files: []
|