db_obfuscation 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/Gemfile +13 -0
  4. data/Gemfile.lock +83 -0
  5. data/LICENSE +21 -0
  6. data/README.md +121 -0
  7. data/TODO +15 -0
  8. data/bin/console +25 -0
  9. data/bin/db_obfuscation +121 -0
  10. data/bin/obfuscation_test +54 -0
  11. data/cli/db_dump.rb +29 -0
  12. data/cli/migrator.rb +26 -0
  13. data/cli/seeder.rb +52 -0
  14. data/db_obfuscation.gemspec +25 -0
  15. data/features/bin/dump.feature +21 -0
  16. data/features/bin/obfuscation.feature +12 -0
  17. data/features/bin/test_database_tasks.feature +16 -0
  18. data/features/support.rb +1 -0
  19. data/lib/db_obfuscation.rb +50 -0
  20. data/lib/db_obfuscation/batch_formulator.rb +26 -0
  21. data/lib/db_obfuscation/config.rb +43 -0
  22. data/lib/db_obfuscation/database.rb +8 -0
  23. data/lib/db_obfuscation/environment.rb +14 -0
  24. data/lib/db_obfuscation/filtering.rb +56 -0
  25. data/lib/db_obfuscation/filtering/column.rb +40 -0
  26. data/lib/db_obfuscation/filtering/truncation.rb +18 -0
  27. data/lib/db_obfuscation/obfuscation_strategy.rb +22 -0
  28. data/lib/db_obfuscation/obfuscator.rb +65 -0
  29. data/lib/db_obfuscation/query_builder.rb +62 -0
  30. data/lib/db_obfuscation/truncation.rb +39 -0
  31. data/lib/db_obfuscation/util/trigger.rb +83 -0
  32. data/lib/db_obfuscation/version.rb +4 -0
  33. data/spec/cli/db_dump_spec.rb +33 -0
  34. data/spec/cli/migrator_spec.rb +59 -0
  35. data/spec/cli/seeder_spec.rb +33 -0
  36. data/spec/config/database.yml +5 -0
  37. data/spec/config/table_strategies/table_1.yml +3 -0
  38. data/spec/config/table_strategies/table_2.yml +4 -0
  39. data/spec/config/table_strategies/truncation_table_1.yml +3 -0
  40. data/spec/config/table_strategies/whitelisted_table_1.yml +3 -0
  41. data/spec/config/truncation_patterns.yml +2 -0
  42. data/spec/config/whitelisted_tables.yml +1 -0
  43. data/spec/db_obfuscation/batch_formulator_spec.rb +36 -0
  44. data/spec/db_obfuscation/config_spec.rb +60 -0
  45. data/spec/db_obfuscation/database_spec.rb +10 -0
  46. data/spec/db_obfuscation/filtering/column_spec.rb +82 -0
  47. data/spec/db_obfuscation/filtering/truncation_spec.rb +41 -0
  48. data/spec/db_obfuscation/filtering_spec.rb +39 -0
  49. data/spec/db_obfuscation/obfuscation_strategy_spec.rb +43 -0
  50. data/spec/db_obfuscation/obfuscator_spec.rb +150 -0
  51. data/spec/db_obfuscation/query_builder_spec.rb +259 -0
  52. data/spec/db_obfuscation/truncation_spec.rb +31 -0
  53. data/spec/db_obfuscation/util/trigger_spec.rb +126 -0
  54. data/spec/integration/obfuscation_spec.rb +69 -0
  55. data/spec/spec_helper.rb +3 -0
  56. data/spec/test_db_setup/migrations/1_add_table_1.rb +18 -0
  57. data/spec/test_db_setup/migrations/2_add_table_2.rb +19 -0
  58. data/spec/test_db_setup/migrations/3_add_truncation_table_1.rb +14 -0
  59. data/spec/test_db_setup/migrations/4_add_whitelisted_table_1.rb +14 -0
  60. data/spec/test_db_setup/migrations/5_add_table_without_any_user_defined_obfuscation_strategies.rb +18 -0
  61. data/spec/test_db_setup/migrations/6_add_table_without_any_obfuscatable_columns.rb +15 -0
  62. data/spec/test_db_setup/migrations/7_add_audit_truncation_table.rb +13 -0
  63. data/spec/test_db_setup/seeds/audit_truncation_table.yml +7 -0
  64. data/spec/test_db_setup/seeds/table_1.yml +13 -0
  65. data/spec/test_db_setup/seeds/table_2.yml +15 -0
  66. data/spec/test_db_setup/seeds/table_without_any_obfuscatable_columns.yml +7 -0
  67. data/spec/test_db_setup/seeds/table_without_any_user_defined_obfuscation_strategies.yml +13 -0
  68. data/spec/test_db_setup/seeds/truncation_table_1.yml +9 -0
  69. data/spec/test_db_setup/seeds/whitelisted_table_1.yml +9 -0
  70. metadata +159 -0
@@ -0,0 +1,29 @@
1
+ require 'yaml'
2
+
3
+ module DbObfuscation
4
+ module Cli
5
+ module DbDump
6
+ def self.dump(config_file, dump_name)
7
+ config = db_config(config_file)
8
+ Kernel.system cmd(config, dump_name)
9
+ end
10
+
11
+ def self.db_config(file)
12
+ YAML.load_file(file)
13
+ end
14
+ private_class_method :db_config
15
+
16
+ def self.cmd(config, dump_name)
17
+ <<-CMD.gsub(/\s{2,}/,' ').strip
18
+ PGPASSWORD=#{config['password']}
19
+ pg_dump
20
+ -h #{config['host']}
21
+ -U #{config['username']} -w
22
+ -Fc -f #{dump_name}
23
+ #{config['database']}
24
+ CMD
25
+ end
26
+ private_class_method :cmd
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,26 @@
1
+ require 'yaml'
2
+
3
+ module DbObfuscation
4
+ module Cli
5
+ class Migrator
6
+ def self.migrate(config_file, path_to_migrations)
7
+ config = db_config(config_file)
8
+
9
+ db_name = "postgres://#{config['host']}/#{config['database']}"
10
+ system("sequel -m #{path_to_migrations} #{db_name}")
11
+ end
12
+
13
+ def self.down_migrate(config_file, path_to_migrations)
14
+ config = db_config(config_file)
15
+
16
+ db_name = "postgres://#{config['host']}/#{config['database']}"
17
+ system("sequel -m #{path_to_migrations} -M 0 #{db_name}")
18
+ end
19
+
20
+ def self.db_config(config_file)
21
+ YAML.load_file(config_file)
22
+ end
23
+ private_class_method :db_config
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,52 @@
1
+ require 'yaml'
2
+ require 'pathname'
3
+ require 'sequel'
4
+
5
+ module DbObfuscation
6
+ module Cli
7
+ class Seeder
8
+
9
+ def initialize(config_file, path_to_seeds)
10
+ @db_connection = db_connection(config_file)
11
+ files = seed_files(path_to_seeds)
12
+ @seed_data = seed_data(files)
13
+ end
14
+
15
+ def self.seed(*args)
16
+ new(*args).seed
17
+ end
18
+
19
+ def seed
20
+ @seed_data.each do |table, data|
21
+ data.each do |row|
22
+ @db_connection[table].insert(row)
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def db_connection(config_file)
30
+ config = YAML.load_file(config_file)
31
+ Sequel.connect(config)
32
+ end
33
+
34
+ def seed_files(path)
35
+ seeds_path = Pathname.new(path).join('*')
36
+ Dir[seeds_path]
37
+ end
38
+
39
+ def table_name(file)
40
+ File.basename(file, '.yml').to_sym
41
+ end
42
+
43
+ def seed_data(files)
44
+ files.each_with_object({}) do |file, data|
45
+ table = table_name(file)
46
+ values = YAML.load_file(file).map { |_,v| v }
47
+ data[table] = values
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'db_obfuscation/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'db_obfuscation'
8
+ spec.version = DbObfuscation::VERSION
9
+ spec.authors = ['Case Commons, LLC', 'Rajat Agrawal']
10
+ spec.email = ['casebook-dev@googlegroups.com', 'agrawal.rajat.89@gmail.com']
11
+ spec.homepage = 'https://github.com/CaseCommonsDevOps/db_obfuscation'
12
+ spec.date = '2015-07-21'
13
+ spec.summary = "A gem to obfuscate a production database with fake values for testing with a production size database"
14
+ spec.description = <<-description.gsub(/\s{2,}/, ' ')
15
+ db_obfuscation is a gem that helps to prepare a production size obfuscated database. This obfuscated database can be used for internal testing purposes like user acceptance testing, QA/Regression testing.
16
+
17
+ db_obfuscation takes a production database and updates data in every row in each table with fake data. db_obfuscation ensures that associations between different tables are still maintained.
18
+ description
19
+ spec.license = 'MIT'
20
+
21
+ spec.files = `git ls-files -z`.split("\x0")
22
+ spec.executables = spec.files.grep(%r{^bin/db_obfuscation}) { |f| File.basename(f) }
23
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
24
+ spec.require_paths = ["lib"]
25
+ end
@@ -0,0 +1,21 @@
1
+ Feature: Dump
2
+ This is a command to compress an obfuscated database
3
+ A compressed db is easier to transport
4
+ It takes a config path for the database connection and
5
+ Name of the compressed database dump
6
+
7
+ Scenario: Successful database dump
8
+
9
+ When I run `../../bin/db_obfuscation dump -c ../../spec/config/database.yml -n test_compressed_db`
10
+ Then the output should contain "Start process"
11
+ Then the output should contain "Start dumping"
12
+ Then the output should contain "Finish dumping"
13
+ Then the output should contain "Process finished"
14
+ Then the output should not contain "Caught an exception"
15
+
16
+ Scenario: Unsuccessful database dump
17
+
18
+ When I run `../../bin/db_obfuscation dump -c incorrect_database.yml -n test_compressed_db`
19
+ Then the output should contain "Start process"
20
+ Then the output should contain "Process finished"
21
+ Then the output should contain "Caught an exception"
@@ -0,0 +1,12 @@
1
+ Feature: Db_Obfuscation
2
+ This is a command line thor task to run obfuscation for a user
3
+ It takes a folder path for obfuscation configuration
4
+ And obfuscates the database
5
+
6
+ Scenario: Successfully Obfuscating a Database
7
+ When I run `../../bin/db_obfuscation obfuscate -c ../../spec/config/ -s 200`
8
+ Then the exit status should be 0
9
+
10
+ Scenario: Unsuccessfully obfuscating a Database
11
+ When I run `../../bin/db_obfuscation obfuscate -c incorrect_config_folder -s 200`
12
+ Then the exit status should not be 0
@@ -0,0 +1,16 @@
1
+ Feature: Test Database tasks
2
+ Scenario: Preparing a test database
3
+ Given I run `createdb obfuscation_test`
4
+
5
+ When I run `../../bin/obfuscation_test initialize_database -c ../../spec/config/database.yml -d ../../spec/test_db_setup`
6
+ Then the output should contain "Dropping database obfuscation_test"
7
+ Then the output should contain "DROP DATABASE obfuscation_test;"
8
+ Then the output should contain "Successfully dropped the database"
9
+
10
+ Then the output should contain "Creating database"
11
+ Then the output should contain "CREATE DATABASE obfuscation_test OWNER"
12
+ Then the output should contain "Finished creating database"
13
+ Then the output should contain "Starting up migration"
14
+ Then the output should contain "Finished migration"
15
+ Then the output should contain "Starting seeding"
16
+ Then the output should contain "Finished seeding"
@@ -0,0 +1 @@
1
+ require 'aruba/cucumber'
@@ -0,0 +1,50 @@
1
+ require 'db_obfuscation/environment'
2
+ require 'db_obfuscation/database'
3
+ require 'db_obfuscation/filtering'
4
+ require 'db_obfuscation/query_builder'
5
+ require 'db_obfuscation/batch_formulator'
6
+
7
+ module DbObfuscation
8
+ extend self
9
+
10
+ def obfuscate(step)
11
+ config = Filtering.obfuscation_config([:string])
12
+ config.each do |table, cfg|
13
+ DbObfuscation.logging.info "Obfuscating #{table}"
14
+ update(table, cfg, step)
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def update(table, config, step)
21
+ ids(table).each_slice(step) do |ids|
22
+ begin
23
+ batch = BatchFormulator.batch_for(config, ids)
24
+ date_columns = date_columns(config)
25
+ multi_update(table, batch, date_columns) unless batch.empty?
26
+ rescue => e
27
+ DbObfuscation.logging.error 'Encountered Exception'
28
+ DbObfuscation.logging.error "#{table} encountered #{e.message}"
29
+ DbObfuscation.logging.error e.backtrace
30
+ end
31
+ end
32
+ end
33
+
34
+ def date_columns(config)
35
+ config.select do |k,v|
36
+ v == :date_strategy
37
+ end.keys
38
+ end
39
+
40
+ def ids(table)
41
+ DB[table].map(:id)
42
+ end
43
+
44
+ def multi_update(table, batch, date_columns)
45
+ sql_query = QueryBuilder.multi_update_sql(table,
46
+ batch,
47
+ date_columns)
48
+ DB.run sql_query
49
+ end
50
+ end
@@ -0,0 +1,26 @@
1
+ require 'db_obfuscation/database'
2
+ require 'db_obfuscation/obfuscator'
3
+
4
+ module DbObfuscation
5
+ class BatchFormulator
6
+
7
+ def self.batch_for(*args)
8
+ self.new.batch_for(*args)
9
+ end
10
+
11
+ def batch_for(config, ids)
12
+ ids.each_with_object([]) do |id, batch|
13
+ batch<<obfuscate_row(config, id)
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ def obfuscate_row(config, id)
20
+ config.each_with_object({}) do |(column, strategy), obfuscated_hash|
21
+ value = Obfuscator.obfuscate(strategy)
22
+ obfuscated_hash[column] = DB.literal value
23
+ end.merge(id: id)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ require 'yaml'
2
+ require 'pathname'
3
+
4
+ module DbObfuscation
5
+ class Config
6
+ class << self
7
+ def config_path
8
+ @config || DbObfuscation.config_dir
9
+ end
10
+
11
+ def config_path=(config)
12
+ @config = Pathname.new(config)
13
+ end
14
+
15
+ def db_config
16
+ YAML.load_file(config_path.join('database.yml'))
17
+ end
18
+
19
+
20
+ def whitelisted_tables
21
+ YAML.load_file(config_path.join('whitelisted_tables.yml'))
22
+ end
23
+
24
+ def table_strategies
25
+ @@table_strategies ||= load_table_strategies
26
+ end
27
+
28
+ def truncation_patterns
29
+ @truncation_tables ||= YAML.load_file(config_path.join('truncation_patterns.yml')).map(&:to_sym)
30
+ end
31
+
32
+ private
33
+ def load_table_strategies
34
+ strategies = {}
35
+ file_path = "#{config_path}/table_strategies/*.yml"
36
+ Dir[file_path].each do |file|
37
+ strategies.merge!(YAML.load_file(file)) if File.file?(file)
38
+ end
39
+ strategies
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,8 @@
1
+ require 'sequel'
2
+ require 'db_obfuscation/config'
3
+
4
+ module DbObfuscation
5
+ DB = Sequel.connect(Config.db_config)
6
+ Sequel.default_timezone = :utc
7
+ Sequel.extension :migration
8
+ end
@@ -0,0 +1,14 @@
1
+ require 'pathname'
2
+ require 'logger'
3
+
4
+ module DbObfuscation
5
+ ROOT = Pathname.new(File.expand_path('../../../', __FILE__)).freeze
6
+ class << self
7
+ attr_accessor :config_dir, :logging
8
+ end
9
+ end
10
+
11
+ lib_path = DbObfuscation::ROOT.join('lib')
12
+ cli_path = DbObfuscation::ROOT.join('cli')
13
+ $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
14
+ $LOAD_PATH.unshift(cli_path) unless $LOAD_PATH.include?(cli_path)
@@ -0,0 +1,56 @@
1
+ require 'db_obfuscation/database'
2
+ require 'db_obfuscation/filtering/column'
3
+ require 'db_obfuscation/truncation'
4
+ require 'db_obfuscation/obfuscation_strategy'
5
+ require 'active_support/core_ext/object/blank'
6
+
7
+ module DbObfuscation
8
+ module Filtering
9
+ extend self
10
+
11
+ def obfuscation_config(types)
12
+ filter_tables.each_with_object({}) do |table, config|
13
+ table_config = config_per_table(table, types)
14
+ config[table] = table_config if table_config.present?
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def config_per_table(table, types)
21
+ table_config = Column.columns_type(table).each_with_object({}) do |(column_name, column_type), config|
22
+ filtered_column = Column.filter(column_name, column_type, types)
23
+ config[column_name] = default_obfuscation_strategy(filtered_column) if filtered_column
24
+ end.merge(user_config(table))
25
+
26
+ reject_whitelisted_columns(table_config)
27
+ end
28
+
29
+ def default_obfuscation_strategy(column)
30
+ ObfuscationStrategy.strategy(column)
31
+ end
32
+
33
+ def filter_tables
34
+ DbObfuscation::DB.tables - exclude_tables
35
+ end
36
+
37
+ def user_config(table)
38
+ table_strategy = DbObfuscation::Config.table_strategies[table.to_s] || {}
39
+ table_strategy.each_with_object({}) { |(k,v), config| config[k.to_sym] = v }
40
+ end
41
+
42
+ def reject_whitelisted_columns(config)
43
+ config.reject { |k,v| v == :whitelisted }
44
+ end
45
+
46
+ def exclude_tables
47
+ DbObfuscation::Truncation.tables +
48
+ whitelisted_tables +
49
+ [:schema_info]
50
+ end
51
+
52
+ def whitelisted_tables
53
+ DbObfuscation::Config.whitelisted_tables.map(&:to_sym)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,40 @@
1
+ require 'db_obfuscation/database'
2
+
3
+ module DbObfuscation
4
+ module Filtering
5
+ module Column
6
+ class << self
7
+ def columns_type(table_name)
8
+ column_types = {}
9
+ columns(table_name).each do |column_name, column_details|
10
+ column_types[column_name] = column_details[:type]
11
+ end
12
+ column_types
13
+ end
14
+
15
+ def columns(table_name)
16
+ DbObfuscation::DB.schema(table_name)
17
+ end
18
+
19
+ def type?(expected_column_types, column_type)
20
+ expected_column_types.include? column_type
21
+ end
22
+
23
+ def polymorphic?(column_name)
24
+ column_name.match(/type$/) ? true : false
25
+ end
26
+
27
+ def ending_in_id?(column_name)
28
+ column_name.match(/id$/) ? true : false
29
+ end
30
+
31
+ def filter(column_name, column_type, expected_types)
32
+ return nil if polymorphic?(column_name)
33
+ return nil if ending_in_id?(column_name)
34
+ return nil unless type?(expected_types, column_type)
35
+ return column_name
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ module DbObfuscation
2
+ module Filtering
3
+ class Truncation
4
+
5
+ def self.matches_patterns(*args)
6
+ new.send(:matches_patterns, *args)
7
+ end
8
+
9
+ private
10
+
11
+ def matches_patterns(tables, patterns)
12
+ patterns.map do |pattern|
13
+ tables.grep(/^#{pattern}(_.*)*$/)
14
+ end.flatten.uniq
15
+ end
16
+ end
17
+ end
18
+ end