db_obfuscation 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/Gemfile +13 -0
  4. data/Gemfile.lock +83 -0
  5. data/LICENSE +21 -0
  6. data/README.md +121 -0
  7. data/TODO +15 -0
  8. data/bin/console +25 -0
  9. data/bin/db_obfuscation +121 -0
  10. data/bin/obfuscation_test +54 -0
  11. data/cli/db_dump.rb +29 -0
  12. data/cli/migrator.rb +26 -0
  13. data/cli/seeder.rb +52 -0
  14. data/db_obfuscation.gemspec +25 -0
  15. data/features/bin/dump.feature +21 -0
  16. data/features/bin/obfuscation.feature +12 -0
  17. data/features/bin/test_database_tasks.feature +16 -0
  18. data/features/support.rb +1 -0
  19. data/lib/db_obfuscation.rb +50 -0
  20. data/lib/db_obfuscation/batch_formulator.rb +26 -0
  21. data/lib/db_obfuscation/config.rb +43 -0
  22. data/lib/db_obfuscation/database.rb +8 -0
  23. data/lib/db_obfuscation/environment.rb +14 -0
  24. data/lib/db_obfuscation/filtering.rb +56 -0
  25. data/lib/db_obfuscation/filtering/column.rb +40 -0
  26. data/lib/db_obfuscation/filtering/truncation.rb +18 -0
  27. data/lib/db_obfuscation/obfuscation_strategy.rb +22 -0
  28. data/lib/db_obfuscation/obfuscator.rb +65 -0
  29. data/lib/db_obfuscation/query_builder.rb +62 -0
  30. data/lib/db_obfuscation/truncation.rb +39 -0
  31. data/lib/db_obfuscation/util/trigger.rb +83 -0
  32. data/lib/db_obfuscation/version.rb +4 -0
  33. data/spec/cli/db_dump_spec.rb +33 -0
  34. data/spec/cli/migrator_spec.rb +59 -0
  35. data/spec/cli/seeder_spec.rb +33 -0
  36. data/spec/config/database.yml +5 -0
  37. data/spec/config/table_strategies/table_1.yml +3 -0
  38. data/spec/config/table_strategies/table_2.yml +4 -0
  39. data/spec/config/table_strategies/truncation_table_1.yml +3 -0
  40. data/spec/config/table_strategies/whitelisted_table_1.yml +3 -0
  41. data/spec/config/truncation_patterns.yml +2 -0
  42. data/spec/config/whitelisted_tables.yml +1 -0
  43. data/spec/db_obfuscation/batch_formulator_spec.rb +36 -0
  44. data/spec/db_obfuscation/config_spec.rb +60 -0
  45. data/spec/db_obfuscation/database_spec.rb +10 -0
  46. data/spec/db_obfuscation/filtering/column_spec.rb +82 -0
  47. data/spec/db_obfuscation/filtering/truncation_spec.rb +41 -0
  48. data/spec/db_obfuscation/filtering_spec.rb +39 -0
  49. data/spec/db_obfuscation/obfuscation_strategy_spec.rb +43 -0
  50. data/spec/db_obfuscation/obfuscator_spec.rb +150 -0
  51. data/spec/db_obfuscation/query_builder_spec.rb +259 -0
  52. data/spec/db_obfuscation/truncation_spec.rb +31 -0
  53. data/spec/db_obfuscation/util/trigger_spec.rb +126 -0
  54. data/spec/integration/obfuscation_spec.rb +69 -0
  55. data/spec/spec_helper.rb +3 -0
  56. data/spec/test_db_setup/migrations/1_add_table_1.rb +18 -0
  57. data/spec/test_db_setup/migrations/2_add_table_2.rb +19 -0
  58. data/spec/test_db_setup/migrations/3_add_truncation_table_1.rb +14 -0
  59. data/spec/test_db_setup/migrations/4_add_whitelisted_table_1.rb +14 -0
  60. data/spec/test_db_setup/migrations/5_add_table_without_any_user_defined_obfuscation_strategies.rb +18 -0
  61. data/spec/test_db_setup/migrations/6_add_table_without_any_obfuscatable_columns.rb +15 -0
  62. data/spec/test_db_setup/migrations/7_add_audit_truncation_table.rb +13 -0
  63. data/spec/test_db_setup/seeds/audit_truncation_table.yml +7 -0
  64. data/spec/test_db_setup/seeds/table_1.yml +13 -0
  65. data/spec/test_db_setup/seeds/table_2.yml +15 -0
  66. data/spec/test_db_setup/seeds/table_without_any_obfuscatable_columns.yml +7 -0
  67. data/spec/test_db_setup/seeds/table_without_any_user_defined_obfuscation_strategies.yml +13 -0
  68. data/spec/test_db_setup/seeds/truncation_table_1.yml +9 -0
  69. data/spec/test_db_setup/seeds/whitelisted_table_1.yml +9 -0
  70. metadata +159 -0
@@ -0,0 +1,29 @@
1
+ require 'yaml'
2
+
3
+ module DbObfuscation
4
+ module Cli
5
+ module DbDump
6
+ def self.dump(config_file, dump_name)
7
+ config = db_config(config_file)
8
+ Kernel.system cmd(config, dump_name)
9
+ end
10
+
11
+ def self.db_config(file)
12
+ YAML.load_file(file)
13
+ end
14
+ private_class_method :db_config
15
+
16
+ def self.cmd(config, dump_name)
17
+ <<-CMD.gsub(/\s{2,}/,' ').strip
18
+ PGPASSWORD=#{config['password']}
19
+ pg_dump
20
+ -h #{config['host']}
21
+ -U #{config['username']} -w
22
+ -Fc -f #{dump_name}
23
+ #{config['database']}
24
+ CMD
25
+ end
26
+ private_class_method :cmd
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,26 @@
1
+ require 'yaml'
2
+
3
+ module DbObfuscation
4
+ module Cli
5
+ class Migrator
6
+ def self.migrate(config_file, path_to_migrations)
7
+ config = db_config(config_file)
8
+
9
+ db_name = "postgres://#{config['host']}/#{config['database']}"
10
+ system("sequel -m #{path_to_migrations} #{db_name}")
11
+ end
12
+
13
+ def self.down_migrate(config_file, path_to_migrations)
14
+ config = db_config(config_file)
15
+
16
+ db_name = "postgres://#{config['host']}/#{config['database']}"
17
+ system("sequel -m #{path_to_migrations} -M 0 #{db_name}")
18
+ end
19
+
20
+ def self.db_config(config_file)
21
+ YAML.load_file(config_file)
22
+ end
23
+ private_class_method :db_config
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,52 @@
1
+ require 'yaml'
2
+ require 'pathname'
3
+ require 'sequel'
4
+
5
+ module DbObfuscation
6
+ module Cli
7
+ class Seeder
8
+
9
+ def initialize(config_file, path_to_seeds)
10
+ @db_connection = db_connection(config_file)
11
+ files = seed_files(path_to_seeds)
12
+ @seed_data = seed_data(files)
13
+ end
14
+
15
+ def self.seed(*args)
16
+ new(*args).seed
17
+ end
18
+
19
+ def seed
20
+ @seed_data.each do |table, data|
21
+ data.each do |row|
22
+ @db_connection[table].insert(row)
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def db_connection(config_file)
30
+ config = YAML.load_file(config_file)
31
+ Sequel.connect(config)
32
+ end
33
+
34
+ def seed_files(path)
35
+ seeds_path = Pathname.new(path).join('*')
36
+ Dir[seeds_path]
37
+ end
38
+
39
+ def table_name(file)
40
+ File.basename(file, '.yml').to_sym
41
+ end
42
+
43
+ def seed_data(files)
44
+ files.each_with_object({}) do |file, data|
45
+ table = table_name(file)
46
+ values = YAML.load_file(file).map { |_,v| v }
47
+ data[table] = values
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'db_obfuscation/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'db_obfuscation'
8
+ spec.version = DbObfuscation::VERSION
9
+ spec.authors = ['Case Commons, LLC', 'Rajat Agrawal']
10
+ spec.email = ['casebook-dev@googlegroups.com', 'agrawal.rajat.89@gmail.com']
11
+ spec.homepage = 'https://github.com/CaseCommonsDevOps/db_obfuscation'
12
+ spec.date = '2015-07-21'
13
+ spec.summary = "A gem to obfuscate a production database with fake values for testing with a production size database"
14
+ spec.description = <<-description.gsub(/\s{2,}/, ' ')
15
+ db_obfuscation is a gem that helps to prepare a production size obfuscated database. This obfuscated database can be used for internal testing purposes like user acceptance testing, QA/Regression testing.
16
+
17
+ db_obfuscation takes a production database and updates data in every row in each table with fake data. db_obfuscation ensures that associations between different tables are still maintained.
18
+ description
19
+ spec.license = 'MIT'
20
+
21
+ spec.files = `git ls-files -z`.split("\x0")
22
+ spec.executables = spec.files.grep(%r{^bin/db_obfuscation}) { |f| File.basename(f) }
23
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
24
+ spec.require_paths = ["lib"]
25
+ end
@@ -0,0 +1,21 @@
1
+ Feature: Dump
2
+ This is a command to compress an obfuscated database
3
+ A compressed db is easier to transport
4
+ It takes a config path for the database connection and
5
+ Name of the compressed database dump
6
+
7
+ Scenario: Successful database dump
8
+
9
+ When I run `../../bin/db_obfuscation dump -c ../../spec/config/database.yml -n test_compressed_db`
10
+ Then the output should contain "Start process"
11
+ Then the output should contain "Start dumping"
12
+ Then the output should contain "Finish dumping"
13
+ Then the output should contain "Process finished"
14
+ Then the output should not contain "Caught an exception"
15
+
16
+ Scenario: Unsuccessful database dump
17
+
18
+ When I run `../../bin/db_obfuscation dump -c incorrect_database.yml -n test_compressed_db`
19
+ Then the output should contain "Start process"
20
+ Then the output should contain "Process finished"
21
+ Then the output should contain "Caught an exception"
@@ -0,0 +1,12 @@
1
+ Feature: Db_Obfuscation
2
+ This is a command line thor task to run obfuscation for a user
3
+ It takes a folder path for obfuscation configuration
4
+ And obfuscates the database
5
+
6
+ Scenario: Successfully Obfuscating a Database
7
+ When I run `../../bin/db_obfuscation obfuscate -c ../../spec/config/ -s 200`
8
+ Then the exit status should be 0
9
+
10
+ Scenario: Unsuccessfully obfuscating a Database
11
+ When I run `../../bin/db_obfuscation obfuscate -c incorrect_config_folder -s 200`
12
+ Then the exit status should not be 0
@@ -0,0 +1,16 @@
1
+ Feature: Test Database tasks
2
+ Scenario: Preparing a test database
3
+ Given I run `createdb obfuscation_test`
4
+
5
+ When I run `../../bin/obfuscation_test initialize_database -c ../../spec/config/database.yml -d ../../spec/test_db_setup`
6
+ Then the output should contain "Dropping database obfuscation_test"
7
+ Then the output should contain "DROP DATABASE obfuscation_test;"
8
+ Then the output should contain "Successfully dropped the database"
9
+
10
+ Then the output should contain "Creating database"
11
+ Then the output should contain "CREATE DATABASE obfuscation_test OWNER"
12
+ Then the output should contain "Finished creating database"
13
+ Then the output should contain "Starting up migration"
14
+ Then the output should contain "Finished migration"
15
+ Then the output should contain "Starting seeding"
16
+ Then the output should contain "Finished seeding"
@@ -0,0 +1 @@
1
+ require 'aruba/cucumber'
@@ -0,0 +1,50 @@
1
+ require 'db_obfuscation/environment'
2
+ require 'db_obfuscation/database'
3
+ require 'db_obfuscation/filtering'
4
+ require 'db_obfuscation/query_builder'
5
+ require 'db_obfuscation/batch_formulator'
6
+
7
+ module DbObfuscation
8
+ extend self
9
+
10
+ def obfuscate(step)
11
+ config = Filtering.obfuscation_config([:string])
12
+ config.each do |table, cfg|
13
+ DbObfuscation.logging.info "Obfuscating #{table}"
14
+ update(table, cfg, step)
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def update(table, config, step)
21
+ ids(table).each_slice(step) do |ids|
22
+ begin
23
+ batch = BatchFormulator.batch_for(config, ids)
24
+ date_columns = date_columns(config)
25
+ multi_update(table, batch, date_columns) unless batch.empty?
26
+ rescue => e
27
+ DbObfuscation.logging.error 'Encountered Exception'
28
+ DbObfuscation.logging.error "#{table} encountered #{e.message}"
29
+ DbObfuscation.logging.error e.backtrace
30
+ end
31
+ end
32
+ end
33
+
34
+ def date_columns(config)
35
+ config.select do |k,v|
36
+ v == :date_strategy
37
+ end.keys
38
+ end
39
+
40
+ def ids(table)
41
+ DB[table].map(:id)
42
+ end
43
+
44
+ def multi_update(table, batch, date_columns)
45
+ sql_query = QueryBuilder.multi_update_sql(table,
46
+ batch,
47
+ date_columns)
48
+ DB.run sql_query
49
+ end
50
+ end
@@ -0,0 +1,26 @@
1
+ require 'db_obfuscation/database'
2
+ require 'db_obfuscation/obfuscator'
3
+
4
+ module DbObfuscation
5
+ class BatchFormulator
6
+
7
+ def self.batch_for(*args)
8
+ self.new.batch_for(*args)
9
+ end
10
+
11
+ def batch_for(config, ids)
12
+ ids.each_with_object([]) do |id, batch|
13
+ batch<<obfuscate_row(config, id)
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ def obfuscate_row(config, id)
20
+ config.each_with_object({}) do |(column, strategy), obfuscated_hash|
21
+ value = Obfuscator.obfuscate(strategy)
22
+ obfuscated_hash[column] = DB.literal value
23
+ end.merge(id: id)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ require 'yaml'
2
+ require 'pathname'
3
+
4
+ module DbObfuscation
5
+ class Config
6
+ class << self
7
+ def config_path
8
+ @config || DbObfuscation.config_dir
9
+ end
10
+
11
+ def config_path=(config)
12
+ @config = Pathname.new(config)
13
+ end
14
+
15
+ def db_config
16
+ YAML.load_file(config_path.join('database.yml'))
17
+ end
18
+
19
+
20
+ def whitelisted_tables
21
+ YAML.load_file(config_path.join('whitelisted_tables.yml'))
22
+ end
23
+
24
+ def table_strategies
25
+ @@table_strategies ||= load_table_strategies
26
+ end
27
+
28
+ def truncation_patterns
29
+ @truncation_tables ||= YAML.load_file(config_path.join('truncation_patterns.yml')).map(&:to_sym)
30
+ end
31
+
32
+ private
33
+ def load_table_strategies
34
+ strategies = {}
35
+ file_path = "#{config_path}/table_strategies/*.yml"
36
+ Dir[file_path].each do |file|
37
+ strategies.merge!(YAML.load_file(file)) if File.file?(file)
38
+ end
39
+ strategies
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,8 @@
1
+ require 'sequel'
2
+ require 'db_obfuscation/config'
3
+
4
+ module DbObfuscation
5
+ DB = Sequel.connect(Config.db_config)
6
+ Sequel.default_timezone = :utc
7
+ Sequel.extension :migration
8
+ end
@@ -0,0 +1,14 @@
1
+ require 'pathname'
2
+ require 'logger'
3
+
4
+ module DbObfuscation
5
+ ROOT = Pathname.new(File.expand_path('../../../', __FILE__)).freeze
6
+ class << self
7
+ attr_accessor :config_dir, :logging
8
+ end
9
+ end
10
+
11
+ lib_path = DbObfuscation::ROOT.join('lib')
12
+ cli_path = DbObfuscation::ROOT.join('cli')
13
+ $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
14
+ $LOAD_PATH.unshift(cli_path) unless $LOAD_PATH.include?(cli_path)
@@ -0,0 +1,56 @@
1
+ require 'db_obfuscation/database'
2
+ require 'db_obfuscation/filtering/column'
3
+ require 'db_obfuscation/truncation'
4
+ require 'db_obfuscation/obfuscation_strategy'
5
+ require 'active_support/core_ext/object/blank'
6
+
7
+ module DbObfuscation
8
+ module Filtering
9
+ extend self
10
+
11
+ def obfuscation_config(types)
12
+ filter_tables.each_with_object({}) do |table, config|
13
+ table_config = config_per_table(table, types)
14
+ config[table] = table_config if table_config.present?
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def config_per_table(table, types)
21
+ table_config = Column.columns_type(table).each_with_object({}) do |(column_name, column_type), config|
22
+ filtered_column = Column.filter(column_name, column_type, types)
23
+ config[column_name] = default_obfuscation_strategy(filtered_column) if filtered_column
24
+ end.merge(user_config(table))
25
+
26
+ reject_whitelisted_columns(table_config)
27
+ end
28
+
29
+ def default_obfuscation_strategy(column)
30
+ ObfuscationStrategy.strategy(column)
31
+ end
32
+
33
+ def filter_tables
34
+ DbObfuscation::DB.tables - exclude_tables
35
+ end
36
+
37
+ def user_config(table)
38
+ table_strategy = DbObfuscation::Config.table_strategies[table.to_s] || {}
39
+ table_strategy.each_with_object({}) { |(k,v), config| config[k.to_sym] = v }
40
+ end
41
+
42
+ def reject_whitelisted_columns(config)
43
+ config.reject { |k,v| v == :whitelisted }
44
+ end
45
+
46
+ def exclude_tables
47
+ DbObfuscation::Truncation.tables +
48
+ whitelisted_tables +
49
+ [:schema_info]
50
+ end
51
+
52
+ def whitelisted_tables
53
+ DbObfuscation::Config.whitelisted_tables.map(&:to_sym)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,40 @@
1
+ require 'db_obfuscation/database'
2
+
3
+ module DbObfuscation
4
+ module Filtering
5
+ module Column
6
+ class << self
7
+ def columns_type(table_name)
8
+ column_types = {}
9
+ columns(table_name).each do |column_name, column_details|
10
+ column_types[column_name] = column_details[:type]
11
+ end
12
+ column_types
13
+ end
14
+
15
+ def columns(table_name)
16
+ DbObfuscation::DB.schema(table_name)
17
+ end
18
+
19
+ def type?(expected_column_types, column_type)
20
+ expected_column_types.include? column_type
21
+ end
22
+
23
+ def polymorphic?(column_name)
24
+ column_name.match(/type$/) ? true : false
25
+ end
26
+
27
+ def ending_in_id?(column_name)
28
+ column_name.match(/id$/) ? true : false
29
+ end
30
+
31
+ def filter(column_name, column_type, expected_types)
32
+ return nil if polymorphic?(column_name)
33
+ return nil if ending_in_id?(column_name)
34
+ return nil unless type?(expected_types, column_type)
35
+ return column_name
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ module DbObfuscation
2
+ module Filtering
3
+ class Truncation
4
+
5
+ def self.matches_patterns(*args)
6
+ new.send(:matches_patterns, *args)
7
+ end
8
+
9
+ private
10
+
11
+ def matches_patterns(tables, patterns)
12
+ patterns.map do |pattern|
13
+ tables.grep(/^#{pattern}(_.*)*$/)
14
+ end.flatten.uniq
15
+ end
16
+ end
17
+ end
18
+ end