db_obfuscation 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +83 -0
- data/LICENSE +21 -0
- data/README.md +121 -0
- data/TODO +15 -0
- data/bin/console +25 -0
- data/bin/db_obfuscation +121 -0
- data/bin/obfuscation_test +54 -0
- data/cli/db_dump.rb +29 -0
- data/cli/migrator.rb +26 -0
- data/cli/seeder.rb +52 -0
- data/db_obfuscation.gemspec +25 -0
- data/features/bin/dump.feature +21 -0
- data/features/bin/obfuscation.feature +12 -0
- data/features/bin/test_database_tasks.feature +16 -0
- data/features/support.rb +1 -0
- data/lib/db_obfuscation.rb +50 -0
- data/lib/db_obfuscation/batch_formulator.rb +26 -0
- data/lib/db_obfuscation/config.rb +43 -0
- data/lib/db_obfuscation/database.rb +8 -0
- data/lib/db_obfuscation/environment.rb +14 -0
- data/lib/db_obfuscation/filtering.rb +56 -0
- data/lib/db_obfuscation/filtering/column.rb +40 -0
- data/lib/db_obfuscation/filtering/truncation.rb +18 -0
- data/lib/db_obfuscation/obfuscation_strategy.rb +22 -0
- data/lib/db_obfuscation/obfuscator.rb +65 -0
- data/lib/db_obfuscation/query_builder.rb +62 -0
- data/lib/db_obfuscation/truncation.rb +39 -0
- data/lib/db_obfuscation/util/trigger.rb +83 -0
- data/lib/db_obfuscation/version.rb +4 -0
- data/spec/cli/db_dump_spec.rb +33 -0
- data/spec/cli/migrator_spec.rb +59 -0
- data/spec/cli/seeder_spec.rb +33 -0
- data/spec/config/database.yml +5 -0
- data/spec/config/table_strategies/table_1.yml +3 -0
- data/spec/config/table_strategies/table_2.yml +4 -0
- data/spec/config/table_strategies/truncation_table_1.yml +3 -0
- data/spec/config/table_strategies/whitelisted_table_1.yml +3 -0
- data/spec/config/truncation_patterns.yml +2 -0
- data/spec/config/whitelisted_tables.yml +1 -0
- data/spec/db_obfuscation/batch_formulator_spec.rb +36 -0
- data/spec/db_obfuscation/config_spec.rb +60 -0
- data/spec/db_obfuscation/database_spec.rb +10 -0
- data/spec/db_obfuscation/filtering/column_spec.rb +82 -0
- data/spec/db_obfuscation/filtering/truncation_spec.rb +41 -0
- data/spec/db_obfuscation/filtering_spec.rb +39 -0
- data/spec/db_obfuscation/obfuscation_strategy_spec.rb +43 -0
- data/spec/db_obfuscation/obfuscator_spec.rb +150 -0
- data/spec/db_obfuscation/query_builder_spec.rb +259 -0
- data/spec/db_obfuscation/truncation_spec.rb +31 -0
- data/spec/db_obfuscation/util/trigger_spec.rb +126 -0
- data/spec/integration/obfuscation_spec.rb +69 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/test_db_setup/migrations/1_add_table_1.rb +18 -0
- data/spec/test_db_setup/migrations/2_add_table_2.rb +19 -0
- data/spec/test_db_setup/migrations/3_add_truncation_table_1.rb +14 -0
- data/spec/test_db_setup/migrations/4_add_whitelisted_table_1.rb +14 -0
- data/spec/test_db_setup/migrations/5_add_table_without_any_user_defined_obfuscation_strategies.rb +18 -0
- data/spec/test_db_setup/migrations/6_add_table_without_any_obfuscatable_columns.rb +15 -0
- data/spec/test_db_setup/migrations/7_add_audit_truncation_table.rb +13 -0
- data/spec/test_db_setup/seeds/audit_truncation_table.yml +7 -0
- data/spec/test_db_setup/seeds/table_1.yml +13 -0
- data/spec/test_db_setup/seeds/table_2.yml +15 -0
- data/spec/test_db_setup/seeds/table_without_any_obfuscatable_columns.yml +7 -0
- data/spec/test_db_setup/seeds/table_without_any_user_defined_obfuscation_strategies.yml +13 -0
- data/spec/test_db_setup/seeds/truncation_table_1.yml +9 -0
- data/spec/test_db_setup/seeds/whitelisted_table_1.yml +9 -0
- metadata +159 -0
data/cli/db_dump.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module DbObfuscation
|
4
|
+
module Cli
|
5
|
+
module DbDump
|
6
|
+
def self.dump(config_file, dump_name)
|
7
|
+
config = db_config(config_file)
|
8
|
+
Kernel.system cmd(config, dump_name)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.db_config(file)
|
12
|
+
YAML.load_file(file)
|
13
|
+
end
|
14
|
+
private_class_method :db_config
|
15
|
+
|
16
|
+
def self.cmd(config, dump_name)
|
17
|
+
<<-CMD.gsub(/\s{2,}/,' ').strip
|
18
|
+
PGPASSWORD=#{config['password']}
|
19
|
+
pg_dump
|
20
|
+
-h #{config['host']}
|
21
|
+
-U #{config['username']} -w
|
22
|
+
-Fc -f #{dump_name}
|
23
|
+
#{config['database']}
|
24
|
+
CMD
|
25
|
+
end
|
26
|
+
private_class_method :cmd
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/cli/migrator.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module DbObfuscation
|
4
|
+
module Cli
|
5
|
+
class Migrator
|
6
|
+
def self.migrate(config_file, path_to_migrations)
|
7
|
+
config = db_config(config_file)
|
8
|
+
|
9
|
+
db_name = "postgres://#{config['host']}/#{config['database']}"
|
10
|
+
system("sequel -m #{path_to_migrations} #{db_name}")
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.down_migrate(config_file, path_to_migrations)
|
14
|
+
config = db_config(config_file)
|
15
|
+
|
16
|
+
db_name = "postgres://#{config['host']}/#{config['database']}"
|
17
|
+
system("sequel -m #{path_to_migrations} -M 0 #{db_name}")
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.db_config(config_file)
|
21
|
+
YAML.load_file(config_file)
|
22
|
+
end
|
23
|
+
private_class_method :db_config
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/cli/seeder.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pathname'
|
3
|
+
require 'sequel'
|
4
|
+
|
5
|
+
module DbObfuscation
|
6
|
+
module Cli
|
7
|
+
class Seeder
|
8
|
+
|
9
|
+
def initialize(config_file, path_to_seeds)
|
10
|
+
@db_connection = db_connection(config_file)
|
11
|
+
files = seed_files(path_to_seeds)
|
12
|
+
@seed_data = seed_data(files)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.seed(*args)
|
16
|
+
new(*args).seed
|
17
|
+
end
|
18
|
+
|
19
|
+
def seed
|
20
|
+
@seed_data.each do |table, data|
|
21
|
+
data.each do |row|
|
22
|
+
@db_connection[table].insert(row)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def db_connection(config_file)
|
30
|
+
config = YAML.load_file(config_file)
|
31
|
+
Sequel.connect(config)
|
32
|
+
end
|
33
|
+
|
34
|
+
def seed_files(path)
|
35
|
+
seeds_path = Pathname.new(path).join('*')
|
36
|
+
Dir[seeds_path]
|
37
|
+
end
|
38
|
+
|
39
|
+
def table_name(file)
|
40
|
+
File.basename(file, '.yml').to_sym
|
41
|
+
end
|
42
|
+
|
43
|
+
def seed_data(files)
|
44
|
+
files.each_with_object({}) do |file, data|
|
45
|
+
table = table_name(file)
|
46
|
+
values = YAML.load_file(file).map { |_,v| v }
|
47
|
+
data[table] = values
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'db_obfuscation/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'db_obfuscation'
|
8
|
+
spec.version = DbObfuscation::VERSION
|
9
|
+
spec.authors = ['Case Commons, LLC', 'Rajat Agrawal']
|
10
|
+
spec.email = ['casebook-dev@googlegroups.com', 'agrawal.rajat.89@gmail.com']
|
11
|
+
spec.homepage = 'https://github.com/CaseCommonsDevOps/db_obfuscation'
|
12
|
+
spec.date = '2015-07-21'
|
13
|
+
spec.summary = "A gem to obfuscate a production database with fake values for testing with a production size database"
|
14
|
+
spec.description = <<-description.gsub(/\s{2,}/, ' ')
|
15
|
+
db_obfuscation is a gem that helps to prepare a production size obfuscated database. This obfuscated database can be used for internal testing purposes like user acceptance testing, QA/Regression testing.
|
16
|
+
|
17
|
+
db_obfuscation takes a production database and updates data in every row in each table with fake data. db_obfuscation ensures that associations between different tables are still maintained.
|
18
|
+
description
|
19
|
+
spec.license = 'MIT'
|
20
|
+
|
21
|
+
spec.files = `git ls-files -z`.split("\x0")
|
22
|
+
spec.executables = spec.files.grep(%r{^bin/db_obfuscation}) { |f| File.basename(f) }
|
23
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
Feature: Dump
|
2
|
+
This is a command to compress an obfuscated database
|
3
|
+
A compressed db is easier to transport
|
4
|
+
It takes a config path for the database connection and
|
5
|
+
Name of the compressed database dump
|
6
|
+
|
7
|
+
Scenario: Successful database dump
|
8
|
+
|
9
|
+
When I run `../../bin/db_obfuscation dump -c ../../spec/config/database.yml -n test_compressed_db`
|
10
|
+
Then the output should contain "Start process"
|
11
|
+
Then the output should contain "Start dumping"
|
12
|
+
Then the output should contain "Finish dumping"
|
13
|
+
Then the output should contain "Process finished"
|
14
|
+
Then the output should not contain "Caught an exception"
|
15
|
+
|
16
|
+
Scenario: Unsuccessful database dump
|
17
|
+
|
18
|
+
When I run `../../bin/db_obfuscation dump -c incorrect_database.yml -n test_compressed_db`
|
19
|
+
Then the output should contain "Start process"
|
20
|
+
Then the output should contain "Process finished"
|
21
|
+
Then the output should contain "Caught an exception"
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Feature: Db_Obfuscation
|
2
|
+
This is a command line thor task to run obfuscation for a user
|
3
|
+
It takes a folder path for obfuscation configuration
|
4
|
+
And obfuscates the database
|
5
|
+
|
6
|
+
Scenario: Successfully Obfuscating a Database
|
7
|
+
When I run `../../bin/db_obfuscation obfuscate -c ../../spec/config/ -s 200`
|
8
|
+
Then the exit status should be 0
|
9
|
+
|
10
|
+
Scenario: Unsuccessfully obfuscating a Database
|
11
|
+
When I run `../../bin/db_obfuscation obfuscate -c incorrect_config_folder -s 200`
|
12
|
+
Then the exit status should not be 0
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Test Database tasks
|
2
|
+
Scenario: Preparing a test database
|
3
|
+
Given I run `createdb obfuscation_test`
|
4
|
+
|
5
|
+
When I run `../../bin/obfuscation_test initialize_database -c ../../spec/config/database.yml -d ../../spec/test_db_setup`
|
6
|
+
Then the output should contain "Dropping database obfuscation_test"
|
7
|
+
Then the output should contain "DROP DATABASE obfuscation_test;"
|
8
|
+
Then the output should contain "Successfully dropped the database"
|
9
|
+
|
10
|
+
Then the output should contain "Creating database"
|
11
|
+
Then the output should contain "CREATE DATABASE obfuscation_test OWNER"
|
12
|
+
Then the output should contain "Finished creating database"
|
13
|
+
Then the output should contain "Starting up migration"
|
14
|
+
Then the output should contain "Finished migration"
|
15
|
+
Then the output should contain "Starting seeding"
|
16
|
+
Then the output should contain "Finished seeding"
|
data/features/support.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'aruba/cucumber'
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'db_obfuscation/environment'
|
2
|
+
require 'db_obfuscation/database'
|
3
|
+
require 'db_obfuscation/filtering'
|
4
|
+
require 'db_obfuscation/query_builder'
|
5
|
+
require 'db_obfuscation/batch_formulator'
|
6
|
+
|
7
|
+
module DbObfuscation
|
8
|
+
extend self
|
9
|
+
|
10
|
+
def obfuscate(step)
|
11
|
+
config = Filtering.obfuscation_config([:string])
|
12
|
+
config.each do |table, cfg|
|
13
|
+
DbObfuscation.logging.info "Obfuscating #{table}"
|
14
|
+
update(table, cfg, step)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def update(table, config, step)
|
21
|
+
ids(table).each_slice(step) do |ids|
|
22
|
+
begin
|
23
|
+
batch = BatchFormulator.batch_for(config, ids)
|
24
|
+
date_columns = date_columns(config)
|
25
|
+
multi_update(table, batch, date_columns) unless batch.empty?
|
26
|
+
rescue => e
|
27
|
+
DbObfuscation.logging.error 'Encountered Exception'
|
28
|
+
DbObfuscation.logging.error "#{table} encountered #{e.message}"
|
29
|
+
DbObfuscation.logging.error e.backtrace
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def date_columns(config)
|
35
|
+
config.select do |k,v|
|
36
|
+
v == :date_strategy
|
37
|
+
end.keys
|
38
|
+
end
|
39
|
+
|
40
|
+
def ids(table)
|
41
|
+
DB[table].map(:id)
|
42
|
+
end
|
43
|
+
|
44
|
+
def multi_update(table, batch, date_columns)
|
45
|
+
sql_query = QueryBuilder.multi_update_sql(table,
|
46
|
+
batch,
|
47
|
+
date_columns)
|
48
|
+
DB.run sql_query
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'db_obfuscation/database'
|
2
|
+
require 'db_obfuscation/obfuscator'
|
3
|
+
|
4
|
+
module DbObfuscation
|
5
|
+
class BatchFormulator
|
6
|
+
|
7
|
+
def self.batch_for(*args)
|
8
|
+
self.new.batch_for(*args)
|
9
|
+
end
|
10
|
+
|
11
|
+
def batch_for(config, ids)
|
12
|
+
ids.each_with_object([]) do |id, batch|
|
13
|
+
batch<<obfuscate_row(config, id)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def obfuscate_row(config, id)
|
20
|
+
config.each_with_object({}) do |(column, strategy), obfuscated_hash|
|
21
|
+
value = Obfuscator.obfuscate(strategy)
|
22
|
+
obfuscated_hash[column] = DB.literal value
|
23
|
+
end.merge(id: id)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module DbObfuscation
|
5
|
+
class Config
|
6
|
+
class << self
|
7
|
+
def config_path
|
8
|
+
@config || DbObfuscation.config_dir
|
9
|
+
end
|
10
|
+
|
11
|
+
def config_path=(config)
|
12
|
+
@config = Pathname.new(config)
|
13
|
+
end
|
14
|
+
|
15
|
+
def db_config
|
16
|
+
YAML.load_file(config_path.join('database.yml'))
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def whitelisted_tables
|
21
|
+
YAML.load_file(config_path.join('whitelisted_tables.yml'))
|
22
|
+
end
|
23
|
+
|
24
|
+
def table_strategies
|
25
|
+
@@table_strategies ||= load_table_strategies
|
26
|
+
end
|
27
|
+
|
28
|
+
def truncation_patterns
|
29
|
+
@truncation_tables ||= YAML.load_file(config_path.join('truncation_patterns.yml')).map(&:to_sym)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def load_table_strategies
|
34
|
+
strategies = {}
|
35
|
+
file_path = "#{config_path}/table_strategies/*.yml"
|
36
|
+
Dir[file_path].each do |file|
|
37
|
+
strategies.merge!(YAML.load_file(file)) if File.file?(file)
|
38
|
+
end
|
39
|
+
strategies
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module DbObfuscation
|
5
|
+
ROOT = Pathname.new(File.expand_path('../../../', __FILE__)).freeze
|
6
|
+
class << self
|
7
|
+
attr_accessor :config_dir, :logging
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
lib_path = DbObfuscation::ROOT.join('lib')
|
12
|
+
cli_path = DbObfuscation::ROOT.join('cli')
|
13
|
+
$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
|
14
|
+
$LOAD_PATH.unshift(cli_path) unless $LOAD_PATH.include?(cli_path)
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'db_obfuscation/database'
|
2
|
+
require 'db_obfuscation/filtering/column'
|
3
|
+
require 'db_obfuscation/truncation'
|
4
|
+
require 'db_obfuscation/obfuscation_strategy'
|
5
|
+
require 'active_support/core_ext/object/blank'
|
6
|
+
|
7
|
+
module DbObfuscation
|
8
|
+
module Filtering
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def obfuscation_config(types)
|
12
|
+
filter_tables.each_with_object({}) do |table, config|
|
13
|
+
table_config = config_per_table(table, types)
|
14
|
+
config[table] = table_config if table_config.present?
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def config_per_table(table, types)
|
21
|
+
table_config = Column.columns_type(table).each_with_object({}) do |(column_name, column_type), config|
|
22
|
+
filtered_column = Column.filter(column_name, column_type, types)
|
23
|
+
config[column_name] = default_obfuscation_strategy(filtered_column) if filtered_column
|
24
|
+
end.merge(user_config(table))
|
25
|
+
|
26
|
+
reject_whitelisted_columns(table_config)
|
27
|
+
end
|
28
|
+
|
29
|
+
def default_obfuscation_strategy(column)
|
30
|
+
ObfuscationStrategy.strategy(column)
|
31
|
+
end
|
32
|
+
|
33
|
+
def filter_tables
|
34
|
+
DbObfuscation::DB.tables - exclude_tables
|
35
|
+
end
|
36
|
+
|
37
|
+
def user_config(table)
|
38
|
+
table_strategy = DbObfuscation::Config.table_strategies[table.to_s] || {}
|
39
|
+
table_strategy.each_with_object({}) { |(k,v), config| config[k.to_sym] = v }
|
40
|
+
end
|
41
|
+
|
42
|
+
def reject_whitelisted_columns(config)
|
43
|
+
config.reject { |k,v| v == :whitelisted }
|
44
|
+
end
|
45
|
+
|
46
|
+
def exclude_tables
|
47
|
+
DbObfuscation::Truncation.tables +
|
48
|
+
whitelisted_tables +
|
49
|
+
[:schema_info]
|
50
|
+
end
|
51
|
+
|
52
|
+
def whitelisted_tables
|
53
|
+
DbObfuscation::Config.whitelisted_tables.map(&:to_sym)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'db_obfuscation/database'
|
2
|
+
|
3
|
+
module DbObfuscation
|
4
|
+
module Filtering
|
5
|
+
module Column
|
6
|
+
class << self
|
7
|
+
def columns_type(table_name)
|
8
|
+
column_types = {}
|
9
|
+
columns(table_name).each do |column_name, column_details|
|
10
|
+
column_types[column_name] = column_details[:type]
|
11
|
+
end
|
12
|
+
column_types
|
13
|
+
end
|
14
|
+
|
15
|
+
def columns(table_name)
|
16
|
+
DbObfuscation::DB.schema(table_name)
|
17
|
+
end
|
18
|
+
|
19
|
+
def type?(expected_column_types, column_type)
|
20
|
+
expected_column_types.include? column_type
|
21
|
+
end
|
22
|
+
|
23
|
+
def polymorphic?(column_name)
|
24
|
+
column_name.match(/type$/) ? true : false
|
25
|
+
end
|
26
|
+
|
27
|
+
def ending_in_id?(column_name)
|
28
|
+
column_name.match(/id$/) ? true : false
|
29
|
+
end
|
30
|
+
|
31
|
+
def filter(column_name, column_type, expected_types)
|
32
|
+
return nil if polymorphic?(column_name)
|
33
|
+
return nil if ending_in_id?(column_name)
|
34
|
+
return nil unless type?(expected_types, column_type)
|
35
|
+
return column_name
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module DbObfuscation
|
2
|
+
module Filtering
|
3
|
+
class Truncation
|
4
|
+
|
5
|
+
def self.matches_patterns(*args)
|
6
|
+
new.send(:matches_patterns, *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
def matches_patterns(tables, patterns)
|
12
|
+
patterns.map do |pattern|
|
13
|
+
tables.grep(/^#{pattern}(_.*)*$/)
|
14
|
+
end.flatten.uniq
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|