column_anonymizer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faker'
4
+
5
+ module ColumnAnonymizer
6
+ class Anonymizer
7
+ # Built-in generators
8
+ BUILT_IN_GENERATORS = {
9
+ email: -> { Faker::Internet.email },
10
+ phone: -> { Faker::PhoneNumber.phone_number },
11
+ ssn: -> { Faker::IdNumber.ssn_valid },
12
+ name: -> { Faker::Name.name },
13
+ first_name: -> { Faker::Name.first_name },
14
+ last_name: -> { Faker::Name.last_name },
15
+ address: -> { Faker::Address.full_address },
16
+ text: -> { Faker::Lorem.paragraph }
17
+ }.freeze
18
+
19
+ # Custom generators registered by the application
20
+ @custom_generators = {}
21
+
22
+ class << self
23
+ attr_reader :custom_generators
24
+
25
+ # Register a custom anonymization generator
26
+ #
27
+ # @param type [Symbol, String] The type identifier to use in YAML config
28
+ # @param generator [Proc, #call] A callable that returns the anonymized value
29
+ #
30
+ # @example Register a custom generator with a block
31
+ # ColumnAnonymizer::Anonymizer.register(:credit_card) do
32
+ # "XXXX-XXXX-XXXX-#{rand(1000..9999)}"
33
+ # end
34
+ #
35
+ # @example Register a custom generator with a callable
36
+ # ColumnAnonymizer::Anonymizer.register(:company_name, -> { Faker::Company.name })
37
+ #
38
+ def register(type, generator = nil, &block)
39
+ generator = block if block_given?
40
+
41
+ unless generator.respond_to?(:call)
42
+ raise ArgumentError, "Generator must be a Proc or respond to #call"
43
+ end
44
+
45
+ @custom_generators[type.to_sym] = generator
46
+
47
+ if defined?(Rails)
48
+ Rails.logger.info "[ColumnAnonymizer] Registered custom generator: #{type}"
49
+ end
50
+ end
51
+
52
+ # Unregister a custom generator
53
+ #
54
+ # @param type [Symbol, String] The type identifier to remove
55
+ #
56
+ def unregister(type)
57
+ @custom_generators.delete(type.to_sym)
58
+ end
59
+
60
+ # Get all available generators (built-in + custom)
61
+ #
62
+ # @return [Hash] All available generators
63
+ #
64
+ def all_generators
65
+ BUILT_IN_GENERATORS.merge(@custom_generators)
66
+ end
67
+
68
+ # Check if a generator exists
69
+ #
70
+ # @param type [Symbol, String] The type to check
71
+ # @return [Boolean]
72
+ #
73
+ def generator_exists?(type)
74
+ all_generators.key?(type.to_sym)
75
+ end
76
+
77
+ # Reset custom generators (useful for testing)
78
+ #
79
+ def reset_custom_generators!
80
+ @custom_generators = {}
81
+ end
82
+
83
+ def anonymize_model(model_instance)
84
+ klass = model_instance.class
85
+ return model_instance unless klass.respond_to?(:encrypted_columns_metadata)
86
+
87
+ generators = all_generators
88
+
89
+ klass.encrypted_columns_metadata.each do |column_name, column_type|
90
+ generator = generators[column_type] || generators[:text]
91
+ model_instance.send("#{column_name}=", generator.call)
92
+ end
93
+
94
+ model_instance
95
+ end
96
+
97
+ def anonymize_model!(model_instance)
98
+ anonymize_model(model_instance)
99
+ model_instance.save!
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ColumnAnonymizer
4
+ module Encryptable
5
+ extend ActiveSupport::Concern
6
+
7
+ class_methods do
8
+ def encrypted_columns_metadata
9
+ @encrypted_columns_metadata ||= load_encrypted_columns_from_yaml
10
+ end
11
+
12
+ def reload_encrypted_columns_metadata!
13
+ ColumnAnonymizer::SchemaLoader.reload_schema!
14
+ @encrypted_columns_metadata = load_encrypted_columns_from_yaml
15
+ end
16
+
17
+ private
18
+
19
+ def load_encrypted_columns_from_yaml
20
+ schema = ColumnAnonymizer::SchemaLoader.schema_for_model(name)
21
+ schema.transform_keys(&:to_sym).transform_values(&:to_sym)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ColumnAnonymizer
4
+ class Railtie < Rails::Railtie
5
+ initializer "column_anonymizer.active_record" do
6
+ ActiveSupport.on_load(:active_record) do
7
+ include ColumnAnonymizer::Encryptable
8
+ end
9
+ end
10
+
11
+ rake_tasks do
12
+ load File.expand_path('../tasks/column_anonymizer.rake', __dir__)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module ColumnAnonymizer
6
+ class SchemaLoader
7
+ class << self
8
+ def load_schema
9
+ return @schema if defined?(@schema)
10
+
11
+ file_path = schema_file_path
12
+
13
+ if File.exist?(file_path)
14
+ @schema = YAML.load_file(file_path) || {}
15
+ Rails.logger.info "[ColumnAnonymizer] Loaded schema from #{file_path}" if defined?(Rails)
16
+ else
17
+ if defined?(Rails)
18
+ Rails.logger.warn "[ColumnAnonymizer] Schema file not found: #{file_path}"
19
+ end
20
+ @schema = {}
21
+ end
22
+
23
+ @schema
24
+ end
25
+
26
+ def reload_schema!
27
+ remove_instance_variable(:@schema) if defined?(@schema)
28
+ load_schema
29
+ end
30
+
31
+ def schema_file_path
32
+ if defined?(Rails)
33
+ Rails.root.join('config', 'encrypted_columns.yml').to_s
34
+ else
35
+ File.join(Dir.pwd, 'config', 'encrypted_columns.yml')
36
+ end
37
+ end
38
+
39
+ def schema_for_model(model_name)
40
+ load_schema[model_name.to_s] || {}
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ColumnAnonymizer
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ require_relative "column_anonymizer/version"
3
+ require_relative "column_anonymizer/schema_loader"
4
+ require_relative "column_anonymizer/encryptable"
5
+ require_relative "column_anonymizer/anonymizer"
6
+ require_relative "column_anonymizer/railtie" if defined?(Rails::Railtie)
7
+ module ColumnAnonymizer
8
+ class Error < StandardError; end
9
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+
5
+ module ColumnAnonymizer
6
+ module Generators
7
+ class InitializerGenerator < Rails::Generators::Base
8
+ source_root File.expand_path('templates', __dir__)
9
+
10
+ desc "Creates an initializer for registering custom anonymization generators"
11
+
12
+ def create_initializer_file
13
+ template 'column_anonymizer.rb', 'config/initializers/column_anonymizer.rb'
14
+ end
15
+
16
+ def show_instructions
17
+ say "\n✅ Initializer created at config/initializers/column_anonymizer.rb", :green
18
+ say "\nYou can now register custom anonymization generators:", :cyan
19
+ say " ColumnAnonymizer::Anonymizer.register(:custom_type) do", :yellow
20
+ say " # Return your custom anonymized value", :yellow
21
+ say " end", :yellow
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Column Anonymizer - Custom Generator Configuration
4
+ #
5
+ # Register custom anonymization generators for use in your encrypted_columns.yml
6
+ #
7
+ # Example:
8
+ # User:
9
+ # credit_card: credit_card # Uses the custom generator defined below
10
+ #
11
+
12
+ # Simple generator with a block
13
+ ColumnAnonymizer::Anonymizer.register(:credit_card) do
14
+ # Generate a fake credit card number (masked)
15
+ "XXXX-XXXX-XXXX-#{rand(1000..9999)}"
16
+ end
17
+
18
+ # Generator using Faker (if available)
19
+ # ColumnAnonymizer::Anonymizer.register(:company_name) do
20
+ # Faker::Company.name
21
+ # end
22
+
23
+ # Generator with custom logic
24
+ # ColumnAnonymizer::Anonymizer.register(:employee_id) do
25
+ # "EMP-#{SecureRandom.hex(4).upcase}"
26
+ # end
27
+
28
+ # Generator with complex formatting
29
+ # ColumnAnonymizer::Anonymizer.register(:license_plate) do
30
+ # letters = ('A'..'Z').to_a.sample(3).join
31
+ # numbers = rand(100..999)
32
+ # "#{letters}-#{numbers}"
33
+ # end
34
+
35
+ # Generator that uses Rails helpers or models
36
+ # ColumnAnonymizer::Anonymizer.register(:department_name) do
37
+ # Department.active.pluck(:name).sample || "General"
38
+ # end
39
+
40
+ # Generator with callable object
41
+ # class CustomGenerator
42
+ # def self.call
43
+ # "CUSTOM-#{Time.now.to_i}"
44
+ # end
45
+ # end
46
+ # ColumnAnonymizer::Anonymizer.register(:timestamp_id, CustomGenerator)
47
+
48
+ # Generator for specific format requirements
49
+ # ColumnAnonymizer::Anonymizer.register(:account_number) do
50
+ # prefix = "ACC"
51
+ # middle = Time.now.year
52
+ # suffix = rand(10000..99999)
53
+ # "#{prefix}#{middle}#{suffix}"
54
+ # end
55
+
56
+ # You can also use lambda syntax
57
+ # ColumnAnonymizer::Anonymizer.register(:uuid, -> { SecureRandom.uuid })
58
+
59
+ # ============================================================================
60
+ # Available Built-in Types (no need to register these):
61
+ # ============================================================================
62
+ # :email - Generates fake email addresses
63
+ # :phone - Generates fake phone numbers
64
+ # :ssn - Generates fake SSN
65
+ # :name - Generates fake full names
66
+ # :first_name - Generates fake first names
67
+ # :last_name - Generates fake last names
68
+ # :address - Generates fake addresses
69
+ # :text - Generates lorem ipsum text
70
+ # ============================================================================
71
+
72
+ # To use your custom types, add them to config/encrypted_columns.yml:
73
+ #
74
+ # User:
75
+ # credit_card_number: credit_card
76
+ # employee_id: employee_id
77
+ # license_number: license_plate
@@ -0,0 +1,46 @@
1
+ ===============================================================================
2
+
3
+ ColumnAnonymizer has been installed!
4
+
5
+ The configuration file has been created at:
6
+
7
+ config/encrypted_columns.yml
8
+
9
+ Next steps:
10
+
11
+ 1. Define your encrypted column types in config/encrypted_columns.yml
12
+
13
+ Example:
14
+ User:
15
+ email: email
16
+ phone_number: phone
17
+ ssn: ssn
18
+
19
+ OR use the automatic scanner to discover encrypted columns:
20
+
21
+ rails generate column_anonymizer:scan
22
+
23
+ This will scan your models for any 'encrypts' calls and automatically
24
+ add them to your config file with intelligent type guessing!
25
+
26
+ 2. Use standard Rails encryption in your models:
27
+
28
+ class User < ApplicationRecord
29
+ encrypts :email
30
+ encrypts :phone_number
31
+ encrypts :ssn
32
+ end
33
+
34
+ 3. Anonymize data when needed:
35
+
36
+ user = User.first
37
+ ColumnAnonymizer::Anonymizer.anonymize_model!(user)
38
+
39
+ Useful commands:
40
+
41
+ rails generate column_anonymizer:scan # Scan models and update config
42
+ rails generate column_anonymizer:install --scan # Install and scan in one step
43
+
44
+ For more information, see: https://github.com/hunter-kendall/column_anonymizer
45
+
46
+ ===============================================================================
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+
5
+ module ColumnAnonymizer
6
+ module Generators
7
+ class InstallGenerator < Rails::Generators::Base
8
+ source_root File.expand_path('templates', __dir__)
9
+
10
+ desc "Creates a ColumnAnonymizer configuration file at config/encrypted_columns.yml"
11
+
12
+ class_option :scan, type: :boolean, default: false,
13
+ desc: "Automatically scan models for encrypted attributes and populate the config"
14
+
15
+ def create_config_file
16
+ template 'encrypted_columns.yml', 'config/encrypted_columns.yml'
17
+ end
18
+
19
+ def scan_models_if_requested
20
+ if options[:scan]
21
+ say "\n🔎 Running model scan...", :cyan
22
+ invoke 'column_anonymizer:scan'
23
+ else
24
+ say "\n💡 Tip: Run with --scan to automatically discover encrypted columns:", :yellow
25
+ say " rails generate column_anonymizer:install --scan", :yellow
26
+ say "\n Or scan later with:", :yellow
27
+ say " rails generate column_anonymizer:scan", :yellow
28
+ end
29
+ end
30
+
31
+ def show_readme
32
+ readme "README" if behavior == :invoke
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,29 @@
1
+ # Encrypted Columns Schema
2
+ #
3
+ # Define the types for your encrypted columns here.
4
+ # This allows the anonymizer to generate appropriate fake data.
5
+ #
6
+ # Structure:
7
+ # ModelName:
8
+ # column_name: column_type
9
+ #
10
+ # Available types:
11
+ # - email : Generates user_abc123@example.com
12
+ # - phone : Generates +15551234567
13
+ # - ssn : Generates 123-45-6789
14
+ # - name : Generates Anonymous User abc123
15
+ # - first_name : Generates John, Jane, Alex, etc.
16
+ # - last_name : Generates Smith, Johnson, etc.
17
+ # - address : Generates 1234 Anonymous St, City, ST 12345
18
+ # - text : Generates generic anonymized text (default)
19
+ #
20
+ # Example:
21
+ # User:
22
+ # email: email
23
+ # phone_number: phone
24
+ # ssn: ssn
25
+ # full_name: name
26
+ #
27
+ # Account:
28
+ # bank_account_number: text
29
+ # routing_number: text
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ require 'yaml'
5
+
6
+ module ColumnAnonymizer
7
+ module Generators
8
+ class ScanGenerator < Rails::Generators::Base
9
+ desc "Scans Rails models for encrypted attributes and updates config/encrypted_columns.yml"
10
+
11
+ def scan_and_update_config
12
+ config_path = Rails.root.join('config', 'encrypted_columns.yml')
13
+
14
+ # Load existing config or create empty hash
15
+ existing_config = if File.exist?(config_path)
16
+ YAML.load_file(config_path) || {}
17
+ else
18
+ {}
19
+ end
20
+
21
+ say "🔍 Scanning models for encrypted attributes...", :cyan
22
+
23
+ # Scan all models
24
+ discovered_encryptions = scan_models
25
+
26
+ if discovered_encryptions.empty?
27
+ say "⚠️ No encrypted attributes found in models", :yellow
28
+ return
29
+ end
30
+
31
+ # Find what's new and needs to be added
32
+ additions = calculate_additions(existing_config, discovered_encryptions)
33
+
34
+ if additions.empty?
35
+ say "ℹ️ All discovered columns are already configured", :yellow
36
+ return
37
+ end
38
+
39
+ # Append new entries to the file
40
+ append_to_config(config_path, additions)
41
+
42
+ say "✅ Scanned #{discovered_encryptions.keys.size} model(s) with encrypted attributes", :green
43
+ say "📝 Appended #{count_additions(additions)} new column(s) to config/encrypted_columns.yml", :green
44
+
45
+ # Show what was found
46
+ discovered_encryptions.each do |model_name, columns|
47
+ say " #{model_name}: #{columns.keys.join(', ')}", :blue
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def scan_models
54
+ encryptions = {}
55
+
56
+ # Ensure models are loaded
57
+ Rails.application.eager_load! if defined?(Rails)
58
+
59
+ # Get all model files
60
+ model_files = Dir[Rails.root.join('app/models/**/*.rb')]
61
+
62
+ model_files.each do |file|
63
+ content = File.read(file)
64
+
65
+ # Extract class name from file
66
+ model_name = extract_model_name(file, content)
67
+ next unless model_name
68
+
69
+ # Find all encrypts calls (single and multiple attributes)
70
+ encrypted_columns = extract_encrypted_columns(content)
71
+
72
+ if encrypted_columns.any?
73
+ encryptions[model_name] = encrypted_columns.each_with_object({}) do |col, hash|
74
+ hash[col] = guess_type(col)
75
+ end
76
+ end
77
+ end
78
+
79
+ encryptions
80
+ end
81
+
82
+ def extract_model_name(file, content)
83
+ # First try to extract from class definition
84
+ if content =~ /class\s+(\w+)\s*</
85
+ return $1
86
+ end
87
+
88
+ # Fallback to filename
89
+ File.basename(file, '.rb').camelize
90
+ end
91
+
92
+ def extract_encrypted_columns(content)
93
+ columns = []
94
+
95
+ # Match encrypts :column_name
96
+ content.scan(/encrypts\s+:(\w+)/).each do |match|
97
+ columns << match[0]
98
+ end
99
+
100
+ # Match encrypts :col1, :col2, :col3
101
+ content.scan(/encrypts\s+(:\w+(?:\s*,\s*:\w+)*)/).each do |match|
102
+ match[0].scan(/:(\w+)/).each do |col|
103
+ columns << col[0] unless columns.include?(col[0])
104
+ end
105
+ end
106
+
107
+ columns.uniq
108
+ end
109
+
110
+ def calculate_additions(existing, discovered)
111
+ additions = {}
112
+
113
+ discovered.each do |model, columns|
114
+ columns.each do |column, type|
115
+ if existing.dig(model, column)
116
+ # Column already exists, skip it
117
+ say " ℹ️ Skipping #{model}.#{column} (already configured as '#{existing[model][column]}')", :yellow
118
+ else
119
+ # New column to add
120
+ additions[model] ||= {}
121
+ additions[model][column] = type
122
+ say " ➕ Adding #{model}.#{column} as '#{type}'", :green
123
+ end
124
+ end
125
+ end
126
+
127
+ additions
128
+ end
129
+
130
+ def append_to_config(config_path, additions)
131
+ # If file doesn't exist, create it with YAML dump
132
+ unless File.exist?(config_path)
133
+ File.write(config_path, YAML.dump(additions))
134
+ return
135
+ end
136
+
137
+ # Read the existing file content
138
+ existing_content = File.read(config_path)
139
+
140
+ # Load existing config to check which models already exist
141
+ existing_config = YAML.load_file(config_path) || {}
142
+
143
+ # Determine if file is empty or has content
144
+ if existing_content.strip.empty? || existing_content.strip == "---"
145
+ # File is empty, write as new YAML
146
+ File.write(config_path, YAML.dump(additions))
147
+ return
148
+ end
149
+
150
+ # Separate additions into new models and columns for existing models
151
+ new_models = {}
152
+ columns_for_existing = {}
153
+
154
+ additions.each do |model, columns|
155
+ if existing_config.key?(model)
156
+ # Model exists, add columns to it
157
+ columns_for_existing[model] = columns
158
+ else
159
+ # New model
160
+ new_models[model] = columns
161
+ end
162
+ end
163
+
164
+ # Handle columns for existing models by inserting them under the model
165
+ unless columns_for_existing.empty?
166
+ lines = existing_content.lines
167
+ modified_lines = []
168
+ i = 0
169
+
170
+ while i < lines.length
171
+ line = lines[i]
172
+ modified_lines << line
173
+
174
+ # Check if this line is a model declaration that we need to add columns to
175
+ if line =~ /^(\w+):\s*$/
176
+ model_name = $1
177
+ if columns_for_existing.key?(model_name)
178
+ # Find the indentation level and add columns after this model's existing columns
179
+ i += 1
180
+ # Skip existing columns of this model
181
+ while i < lines.length && lines[i] =~ /^\s{2,}\w+:/
182
+ modified_lines << lines[i]
183
+ i += 1
184
+ end
185
+
186
+ # Add new columns for this model
187
+ columns_for_existing[model_name].each do |column, type|
188
+ modified_lines << " #{column}: #{type}\n"
189
+ end
190
+
191
+ # Continue from current position (already incremented)
192
+ next
193
+ end
194
+ end
195
+
196
+ i += 1
197
+ end
198
+
199
+ existing_content = modified_lines.join
200
+ end
201
+
202
+ # Append completely new models at the end
203
+ unless new_models.empty?
204
+ File.open(config_path, 'w') do |file|
205
+ file.write(existing_content)
206
+ # Add a newline if file doesn't end with one
207
+ file.write("\n") unless existing_content.end_with?("\n")
208
+
209
+ new_models.each do |model, columns|
210
+ file.write("\n") # Extra newline before new model
211
+ file.write("#{model}:\n")
212
+ columns.each do |column, type|
213
+ file.write(" #{column}: #{type}\n")
214
+ end
215
+ end
216
+ end
217
+ else
218
+ # Only added columns to existing models, write the modified content
219
+ File.write(config_path, existing_content)
220
+ end
221
+ end
222
+
223
+ def count_additions(additions)
224
+ additions.values.sum { |columns| columns.size }
225
+ end
226
+
227
+ def guess_type(column_name)
228
+ col = column_name.to_s.downcase
229
+
230
+ case col
231
+ when /email/ then 'email'
232
+ when /phone|mobile|cell|telephone/ then 'phone'
233
+ when /ssn|social_security/ then 'ssn'
234
+ when /first_name|fname/ then 'first_name'
235
+ when /last_name|lname|surname/ then 'last_name'
236
+ when /^name$|full_name|fullname/ then 'name'
237
+ when /address|street|addr/ then 'address'
238
+ when /city/ then 'text'
239
+ when /state|province/ then 'text'
240
+ when /zip|postal/ then 'text'
241
+ when /card_number|credit_card|cc_number/ then 'text'
242
+ when /cvv|cvc/ then 'text'
243
+ when /password/ then 'text'
244
+ when /token|secret/ then 'text'
245
+ else 'text'
246
+ end
247
+ end
248
+ end
249
+ end
250
+ end