db_obfuscation 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +83 -0
- data/LICENSE +21 -0
- data/README.md +121 -0
- data/TODO +15 -0
- data/bin/console +25 -0
- data/bin/db_obfuscation +121 -0
- data/bin/obfuscation_test +54 -0
- data/cli/db_dump.rb +29 -0
- data/cli/migrator.rb +26 -0
- data/cli/seeder.rb +52 -0
- data/db_obfuscation.gemspec +25 -0
- data/features/bin/dump.feature +21 -0
- data/features/bin/obfuscation.feature +12 -0
- data/features/bin/test_database_tasks.feature +16 -0
- data/features/support.rb +1 -0
- data/lib/db_obfuscation.rb +50 -0
- data/lib/db_obfuscation/batch_formulator.rb +26 -0
- data/lib/db_obfuscation/config.rb +43 -0
- data/lib/db_obfuscation/database.rb +8 -0
- data/lib/db_obfuscation/environment.rb +14 -0
- data/lib/db_obfuscation/filtering.rb +56 -0
- data/lib/db_obfuscation/filtering/column.rb +40 -0
- data/lib/db_obfuscation/filtering/truncation.rb +18 -0
- data/lib/db_obfuscation/obfuscation_strategy.rb +22 -0
- data/lib/db_obfuscation/obfuscator.rb +65 -0
- data/lib/db_obfuscation/query_builder.rb +62 -0
- data/lib/db_obfuscation/truncation.rb +39 -0
- data/lib/db_obfuscation/util/trigger.rb +83 -0
- data/lib/db_obfuscation/version.rb +4 -0
- data/spec/cli/db_dump_spec.rb +33 -0
- data/spec/cli/migrator_spec.rb +59 -0
- data/spec/cli/seeder_spec.rb +33 -0
- data/spec/config/database.yml +5 -0
- data/spec/config/table_strategies/table_1.yml +3 -0
- data/spec/config/table_strategies/table_2.yml +4 -0
- data/spec/config/table_strategies/truncation_table_1.yml +3 -0
- data/spec/config/table_strategies/whitelisted_table_1.yml +3 -0
- data/spec/config/truncation_patterns.yml +2 -0
- data/spec/config/whitelisted_tables.yml +1 -0
- data/spec/db_obfuscation/batch_formulator_spec.rb +36 -0
- data/spec/db_obfuscation/config_spec.rb +60 -0
- data/spec/db_obfuscation/database_spec.rb +10 -0
- data/spec/db_obfuscation/filtering/column_spec.rb +82 -0
- data/spec/db_obfuscation/filtering/truncation_spec.rb +41 -0
- data/spec/db_obfuscation/filtering_spec.rb +39 -0
- data/spec/db_obfuscation/obfuscation_strategy_spec.rb +43 -0
- data/spec/db_obfuscation/obfuscator_spec.rb +150 -0
- data/spec/db_obfuscation/query_builder_spec.rb +259 -0
- data/spec/db_obfuscation/truncation_spec.rb +31 -0
- data/spec/db_obfuscation/util/trigger_spec.rb +126 -0
- data/spec/integration/obfuscation_spec.rb +69 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/test_db_setup/migrations/1_add_table_1.rb +18 -0
- data/spec/test_db_setup/migrations/2_add_table_2.rb +19 -0
- data/spec/test_db_setup/migrations/3_add_truncation_table_1.rb +14 -0
- data/spec/test_db_setup/migrations/4_add_whitelisted_table_1.rb +14 -0
- data/spec/test_db_setup/migrations/5_add_table_without_any_user_defined_obfuscation_strategies.rb +18 -0
- data/spec/test_db_setup/migrations/6_add_table_without_any_obfuscatable_columns.rb +15 -0
- data/spec/test_db_setup/migrations/7_add_audit_truncation_table.rb +13 -0
- data/spec/test_db_setup/seeds/audit_truncation_table.yml +7 -0
- data/spec/test_db_setup/seeds/table_1.yml +13 -0
- data/spec/test_db_setup/seeds/table_2.yml +15 -0
- data/spec/test_db_setup/seeds/table_without_any_obfuscatable_columns.yml +7 -0
- data/spec/test_db_setup/seeds/table_without_any_user_defined_obfuscation_strategies.yml +13 -0
- data/spec/test_db_setup/seeds/truncation_table_1.yml +9 -0
- data/spec/test_db_setup/seeds/whitelisted_table_1.yml +9 -0
- metadata +159 -0
data/cli/db_dump.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module DbObfuscation
|
4
|
+
module Cli
|
5
|
+
module DbDump
|
6
|
+
def self.dump(config_file, dump_name)
|
7
|
+
config = db_config(config_file)
|
8
|
+
Kernel.system cmd(config, dump_name)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.db_config(file)
|
12
|
+
YAML.load_file(file)
|
13
|
+
end
|
14
|
+
private_class_method :db_config
|
15
|
+
|
16
|
+
def self.cmd(config, dump_name)
|
17
|
+
<<-CMD.gsub(/\s{2,}/,' ').strip
|
18
|
+
PGPASSWORD=#{config['password']}
|
19
|
+
pg_dump
|
20
|
+
-h #{config['host']}
|
21
|
+
-U #{config['username']} -w
|
22
|
+
-Fc -f #{dump_name}
|
23
|
+
#{config['database']}
|
24
|
+
CMD
|
25
|
+
end
|
26
|
+
private_class_method :cmd
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/cli/migrator.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module DbObfuscation
|
4
|
+
module Cli
|
5
|
+
class Migrator
|
6
|
+
def self.migrate(config_file, path_to_migrations)
|
7
|
+
config = db_config(config_file)
|
8
|
+
|
9
|
+
db_name = "postgres://#{config['host']}/#{config['database']}"
|
10
|
+
system("sequel -m #{path_to_migrations} #{db_name}")
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.down_migrate(config_file, path_to_migrations)
|
14
|
+
config = db_config(config_file)
|
15
|
+
|
16
|
+
db_name = "postgres://#{config['host']}/#{config['database']}"
|
17
|
+
system("sequel -m #{path_to_migrations} -M 0 #{db_name}")
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.db_config(config_file)
|
21
|
+
YAML.load_file(config_file)
|
22
|
+
end
|
23
|
+
private_class_method :db_config
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/cli/seeder.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pathname'
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module DbObfuscation
|
6
|
+
module Cli
|
7
|
+
class Seeder
|
8
|
+
|
9
|
+
def initialize(config_file, path_to_seeds)
|
10
|
+
@db_connection = db_connection(config_file)
|
11
|
+
files = seed_files(path_to_seeds)
|
12
|
+
@seed_data = seed_data(files)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.seed(*args)
|
16
|
+
new(*args).seed
|
17
|
+
end
|
18
|
+
|
19
|
+
def seed
|
20
|
+
@seed_data.each do |table, data|
|
21
|
+
data.each do |row|
|
22
|
+
@db_connection[table].insert(row)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def db_connection(config_file)
|
30
|
+
config = YAML.load_file(config_file)
|
31
|
+
Sequel.connect(config)
|
32
|
+
end
|
33
|
+
|
34
|
+
def seed_files(path)
|
35
|
+
seeds_path = Pathname.new(path).join('*')
|
36
|
+
Dir[seeds_path]
|
37
|
+
end
|
38
|
+
|
39
|
+
def table_name(file)
|
40
|
+
File.basename(file, '.yml').to_sym
|
41
|
+
end
|
42
|
+
|
43
|
+
def seed_data(files)
|
44
|
+
files.each_with_object({}) do |file, data|
|
45
|
+
table = table_name(file)
|
46
|
+
values = YAML.load_file(file).map { |_,v| v }
|
47
|
+
data[table] = values
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'db_obfuscation/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'db_obfuscation'
|
8
|
+
spec.version = DbObfuscation::VERSION
|
9
|
+
spec.authors = ['Case Commons, LLC', 'Rajat Agrawal']
|
10
|
+
spec.email = ['casebook-dev@googlegroups.com', 'agrawal.rajat.89@gmail.com']
|
11
|
+
spec.homepage = 'https://github.com/CaseCommonsDevOps/db_obfuscation'
|
12
|
+
spec.date = '2015-07-21'
|
13
|
+
spec.summary = "A gem to obfuscate a production database with fake values for testing with a production size database"
|
14
|
+
spec.description = <<-description.gsub(/\s{2,}/, ' ')
|
15
|
+
db_obfuscation is a gem that helps to prepare a production size obfuscated database. This obfuscated database can be used for internal testing purposes like user acceptance testing, QA/Regression testing.
|
16
|
+
|
17
|
+
db_obfuscation takes a production database and updates data in every row in each table with fake data. db_obfuscation ensures that associations between different tables are still maintained.
|
18
|
+
description
|
19
|
+
spec.license = 'MIT'
|
20
|
+
|
21
|
+
spec.files = `git ls-files -z`.split("\x0")
|
22
|
+
spec.executables = spec.files.grep(%r{^bin/db_obfuscation}) { |f| File.basename(f) }
|
23
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Feature: Dump
|
2
|
+
This is a command to compress an obfuscated database
|
3
|
+
A compressed db is easier to transport
|
4
|
+
It takes a config path for the database connection and
|
5
|
+
Name of the compressed database dump
|
6
|
+
|
7
|
+
Scenario: Successful database dump
|
8
|
+
|
9
|
+
When I run `../../bin/db_obfuscation dump -c ../../spec/config/database.yml -n test_compressed_db`
|
10
|
+
Then the output should contain "Start process"
|
11
|
+
Then the output should contain "Start dumping"
|
12
|
+
Then the output should contain "Finish dumping"
|
13
|
+
Then the output should contain "Process finished"
|
14
|
+
Then the output should not contain "Caught an exception"
|
15
|
+
|
16
|
+
Scenario: Unsuccessful database dump
|
17
|
+
|
18
|
+
When I run `../../bin/db_obfuscation dump -c incorrect_database.yml -n test_compressed_db`
|
19
|
+
Then the output should contain "Start process"
|
20
|
+
Then the output should contain "Process finished"
|
21
|
+
Then the output should contain "Caught an exception"
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Db_Obfuscation
|
2
|
+
This is a command line thor task to run obfuscation for a user
|
3
|
+
It takes a folder path for obfuscation configuration
|
4
|
+
And obfuscates the database
|
5
|
+
|
6
|
+
Scenario: Successfully Obfuscating a Database
|
7
|
+
When I run `../../bin/db_obfuscation obfuscate -c ../../spec/config/ -s 200`
|
8
|
+
Then the exit status should be 0
|
9
|
+
|
10
|
+
Scenario: Unsuccessfully obfuscating a Database
|
11
|
+
When I run `../../bin/db_obfuscation obfuscate -c incorrect_config_folder -s 200`
|
12
|
+
Then the exit status should not be 0
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Test Database tasks
|
2
|
+
Scenario: Preparing a test database
|
3
|
+
Given I run `createdb obfuscation_test`
|
4
|
+
|
5
|
+
When I run `../../bin/obfuscation_test initialize_database -c ../../spec/config/database.yml -d ../../spec/test_db_setup`
|
6
|
+
Then the output should contain "Dropping database obfuscation_test"
|
7
|
+
Then the output should contain "DROP DATABASE obfuscation_test;"
|
8
|
+
Then the output should contain "Successfully dropped the database"
|
9
|
+
|
10
|
+
Then the output should contain "Creating database"
|
11
|
+
Then the output should contain "CREATE DATABASE obfuscation_test OWNER"
|
12
|
+
Then the output should contain "Finished creating database"
|
13
|
+
Then the output should contain "Starting up migration"
|
14
|
+
Then the output should contain "Finished migration"
|
15
|
+
Then the output should contain "Starting seeding"
|
16
|
+
Then the output should contain "Finished seeding"
|
data/features/support.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'aruba/cucumber'
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'db_obfuscation/environment'
|
2
|
+
require 'db_obfuscation/database'
|
3
|
+
require 'db_obfuscation/filtering'
|
4
|
+
require 'db_obfuscation/query_builder'
|
5
|
+
require 'db_obfuscation/batch_formulator'
|
6
|
+
|
7
|
+
module DbObfuscation
|
8
|
+
extend self
|
9
|
+
|
10
|
+
def obfuscate(step)
|
11
|
+
config = Filtering.obfuscation_config([:string])
|
12
|
+
config.each do |table, cfg|
|
13
|
+
DbObfuscation.logging.info "Obfuscating #{table}"
|
14
|
+
update(table, cfg, step)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def update(table, config, step)
|
21
|
+
ids(table).each_slice(step) do |ids|
|
22
|
+
begin
|
23
|
+
batch = BatchFormulator.batch_for(config, ids)
|
24
|
+
date_columns = date_columns(config)
|
25
|
+
multi_update(table, batch, date_columns) unless batch.empty?
|
26
|
+
rescue => e
|
27
|
+
DbObfuscation.logging.error 'Encountered Exception'
|
28
|
+
DbObfuscation.logging.error "#{table} encountered #{e.message}"
|
29
|
+
DbObfuscation.logging.error e.backtrace
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def date_columns(config)
|
35
|
+
config.select do |k,v|
|
36
|
+
v == :date_strategy
|
37
|
+
end.keys
|
38
|
+
end
|
39
|
+
|
40
|
+
def ids(table)
|
41
|
+
DB[table].map(:id)
|
42
|
+
end
|
43
|
+
|
44
|
+
def multi_update(table, batch, date_columns)
|
45
|
+
sql_query = QueryBuilder.multi_update_sql(table,
|
46
|
+
batch,
|
47
|
+
date_columns)
|
48
|
+
DB.run sql_query
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'db_obfuscation/database'
|
2
|
+
require 'db_obfuscation/obfuscator'
|
3
|
+
|
4
|
+
module DbObfuscation
|
5
|
+
class BatchFormulator
|
6
|
+
|
7
|
+
def self.batch_for(*args)
|
8
|
+
self.new.batch_for(*args)
|
9
|
+
end
|
10
|
+
|
11
|
+
def batch_for(config, ids)
|
12
|
+
ids.each_with_object([]) do |id, batch|
|
13
|
+
batch<<obfuscate_row(config, id)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def obfuscate_row(config, id)
|
20
|
+
config.each_with_object({}) do |(column, strategy), obfuscated_hash|
|
21
|
+
value = Obfuscator.obfuscate(strategy)
|
22
|
+
obfuscated_hash[column] = DB.literal value
|
23
|
+
end.merge(id: id)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module DbObfuscation
|
5
|
+
class Config
|
6
|
+
class << self
|
7
|
+
def config_path
|
8
|
+
@config || DbObfuscation.config_dir
|
9
|
+
end
|
10
|
+
|
11
|
+
def config_path=(config)
|
12
|
+
@config = Pathname.new(config)
|
13
|
+
end
|
14
|
+
|
15
|
+
def db_config
|
16
|
+
YAML.load_file(config_path.join('database.yml'))
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def whitelisted_tables
|
21
|
+
YAML.load_file(config_path.join('whitelisted_tables.yml'))
|
22
|
+
end
|
23
|
+
|
24
|
+
def table_strategies
|
25
|
+
@@table_strategies ||= load_table_strategies
|
26
|
+
end
|
27
|
+
|
28
|
+
def truncation_patterns
|
29
|
+
@truncation_tables ||= YAML.load_file(config_path.join('truncation_patterns.yml')).map(&:to_sym)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def load_table_strategies
|
34
|
+
strategies = {}
|
35
|
+
file_path = "#{config_path}/table_strategies/*.yml"
|
36
|
+
Dir[file_path].each do |file|
|
37
|
+
strategies.merge!(YAML.load_file(file)) if File.file?(file)
|
38
|
+
end
|
39
|
+
strategies
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module DbObfuscation
|
5
|
+
ROOT = Pathname.new(File.expand_path('../../../', __FILE__)).freeze
|
6
|
+
class << self
|
7
|
+
attr_accessor :config_dir, :logging
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
lib_path = DbObfuscation::ROOT.join('lib')
|
12
|
+
cli_path = DbObfuscation::ROOT.join('cli')
|
13
|
+
$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
|
14
|
+
$LOAD_PATH.unshift(cli_path) unless $LOAD_PATH.include?(cli_path)
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'db_obfuscation/database'
|
2
|
+
require 'db_obfuscation/filtering/column'
|
3
|
+
require 'db_obfuscation/truncation'
|
4
|
+
require 'db_obfuscation/obfuscation_strategy'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
6
|
+
|
7
|
+
module DbObfuscation
|
8
|
+
module Filtering
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def obfuscation_config(types)
|
12
|
+
filter_tables.each_with_object({}) do |table, config|
|
13
|
+
table_config = config_per_table(table, types)
|
14
|
+
config[table] = table_config if table_config.present?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def config_per_table(table, types)
|
21
|
+
table_config = Column.columns_type(table).each_with_object({}) do |(column_name, column_type), config|
|
22
|
+
filtered_column = Column.filter(column_name, column_type, types)
|
23
|
+
config[column_name] = default_obfuscation_strategy(filtered_column) if filtered_column
|
24
|
+
end.merge(user_config(table))
|
25
|
+
|
26
|
+
reject_whitelisted_columns(table_config)
|
27
|
+
end
|
28
|
+
|
29
|
+
def default_obfuscation_strategy(column)
|
30
|
+
ObfuscationStrategy.strategy(column)
|
31
|
+
end
|
32
|
+
|
33
|
+
def filter_tables
|
34
|
+
DbObfuscation::DB.tables - exclude_tables
|
35
|
+
end
|
36
|
+
|
37
|
+
def user_config(table)
|
38
|
+
table_strategy = DbObfuscation::Config.table_strategies[table.to_s] || {}
|
39
|
+
table_strategy.each_with_object({}) { |(k,v), config| config[k.to_sym] = v }
|
40
|
+
end
|
41
|
+
|
42
|
+
def reject_whitelisted_columns(config)
|
43
|
+
config.reject { |k,v| v == :whitelisted }
|
44
|
+
end
|
45
|
+
|
46
|
+
def exclude_tables
|
47
|
+
DbObfuscation::Truncation.tables +
|
48
|
+
whitelisted_tables +
|
49
|
+
[:schema_info]
|
50
|
+
end
|
51
|
+
|
52
|
+
def whitelisted_tables
|
53
|
+
DbObfuscation::Config.whitelisted_tables.map(&:to_sym)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'db_obfuscation/database'
|
2
|
+
|
3
|
+
module DbObfuscation
|
4
|
+
module Filtering
|
5
|
+
module Column
|
6
|
+
class << self
|
7
|
+
def columns_type(table_name)
|
8
|
+
column_types = {}
|
9
|
+
columns(table_name).each do |column_name, column_details|
|
10
|
+
column_types[column_name] = column_details[:type]
|
11
|
+
end
|
12
|
+
column_types
|
13
|
+
end
|
14
|
+
|
15
|
+
def columns(table_name)
|
16
|
+
DbObfuscation::DB.schema(table_name)
|
17
|
+
end
|
18
|
+
|
19
|
+
def type?(expected_column_types, column_type)
|
20
|
+
expected_column_types.include? column_type
|
21
|
+
end
|
22
|
+
|
23
|
+
def polymorphic?(column_name)
|
24
|
+
column_name.match(/type$/) ? true : false
|
25
|
+
end
|
26
|
+
|
27
|
+
def ending_in_id?(column_name)
|
28
|
+
column_name.match(/id$/) ? true : false
|
29
|
+
end
|
30
|
+
|
31
|
+
def filter(column_name, column_type, expected_types)
|
32
|
+
return nil if polymorphic?(column_name)
|
33
|
+
return nil if ending_in_id?(column_name)
|
34
|
+
return nil unless type?(expected_types, column_type)
|
35
|
+
return column_name
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module DbObfuscation
|
2
|
+
module Filtering
|
3
|
+
class Truncation
|
4
|
+
|
5
|
+
def self.matches_patterns(*args)
|
6
|
+
new.send(:matches_patterns, *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
def matches_patterns(tables, patterns)
|
12
|
+
patterns.map do |pattern|
|
13
|
+
tables.grep(/^#{pattern}(_.*)*$/)
|
14
|
+
end.flatten.uniq
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|