db_obfuscation 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/Gemfile +13 -0
  4. data/Gemfile.lock +83 -0
  5. data/LICENSE +21 -0
  6. data/README.md +121 -0
  7. data/TODO +15 -0
  8. data/bin/console +25 -0
  9. data/bin/db_obfuscation +121 -0
  10. data/bin/obfuscation_test +54 -0
  11. data/cli/db_dump.rb +29 -0
  12. data/cli/migrator.rb +26 -0
  13. data/cli/seeder.rb +52 -0
  14. data/db_obfuscation.gemspec +25 -0
  15. data/features/bin/dump.feature +21 -0
  16. data/features/bin/obfuscation.feature +12 -0
  17. data/features/bin/test_database_tasks.feature +16 -0
  18. data/features/support.rb +1 -0
  19. data/lib/db_obfuscation.rb +50 -0
  20. data/lib/db_obfuscation/batch_formulator.rb +26 -0
  21. data/lib/db_obfuscation/config.rb +43 -0
  22. data/lib/db_obfuscation/database.rb +8 -0
  23. data/lib/db_obfuscation/environment.rb +14 -0
  24. data/lib/db_obfuscation/filtering.rb +56 -0
  25. data/lib/db_obfuscation/filtering/column.rb +40 -0
  26. data/lib/db_obfuscation/filtering/truncation.rb +18 -0
  27. data/lib/db_obfuscation/obfuscation_strategy.rb +22 -0
  28. data/lib/db_obfuscation/obfuscator.rb +65 -0
  29. data/lib/db_obfuscation/query_builder.rb +62 -0
  30. data/lib/db_obfuscation/truncation.rb +39 -0
  31. data/lib/db_obfuscation/util/trigger.rb +83 -0
  32. data/lib/db_obfuscation/version.rb +4 -0
  33. data/spec/cli/db_dump_spec.rb +33 -0
  34. data/spec/cli/migrator_spec.rb +59 -0
  35. data/spec/cli/seeder_spec.rb +33 -0
  36. data/spec/config/database.yml +5 -0
  37. data/spec/config/table_strategies/table_1.yml +3 -0
  38. data/spec/config/table_strategies/table_2.yml +4 -0
  39. data/spec/config/table_strategies/truncation_table_1.yml +3 -0
  40. data/spec/config/table_strategies/whitelisted_table_1.yml +3 -0
  41. data/spec/config/truncation_patterns.yml +2 -0
  42. data/spec/config/whitelisted_tables.yml +1 -0
  43. data/spec/db_obfuscation/batch_formulator_spec.rb +36 -0
  44. data/spec/db_obfuscation/config_spec.rb +60 -0
  45. data/spec/db_obfuscation/database_spec.rb +10 -0
  46. data/spec/db_obfuscation/filtering/column_spec.rb +82 -0
  47. data/spec/db_obfuscation/filtering/truncation_spec.rb +41 -0
  48. data/spec/db_obfuscation/filtering_spec.rb +39 -0
  49. data/spec/db_obfuscation/obfuscation_strategy_spec.rb +43 -0
  50. data/spec/db_obfuscation/obfuscator_spec.rb +150 -0
  51. data/spec/db_obfuscation/query_builder_spec.rb +259 -0
  52. data/spec/db_obfuscation/truncation_spec.rb +31 -0
  53. data/spec/db_obfuscation/util/trigger_spec.rb +126 -0
  54. data/spec/integration/obfuscation_spec.rb +69 -0
  55. data/spec/spec_helper.rb +3 -0
  56. data/spec/test_db_setup/migrations/1_add_table_1.rb +18 -0
  57. data/spec/test_db_setup/migrations/2_add_table_2.rb +19 -0
  58. data/spec/test_db_setup/migrations/3_add_truncation_table_1.rb +14 -0
  59. data/spec/test_db_setup/migrations/4_add_whitelisted_table_1.rb +14 -0
  60. data/spec/test_db_setup/migrations/5_add_table_without_any_user_defined_obfuscation_strategies.rb +18 -0
  61. data/spec/test_db_setup/migrations/6_add_table_without_any_obfuscatable_columns.rb +15 -0
  62. data/spec/test_db_setup/migrations/7_add_audit_truncation_table.rb +13 -0
  63. data/spec/test_db_setup/seeds/audit_truncation_table.yml +7 -0
  64. data/spec/test_db_setup/seeds/table_1.yml +13 -0
  65. data/spec/test_db_setup/seeds/table_2.yml +15 -0
  66. data/spec/test_db_setup/seeds/table_without_any_obfuscatable_columns.yml +7 -0
  67. data/spec/test_db_setup/seeds/table_without_any_user_defined_obfuscation_strategies.yml +13 -0
  68. data/spec/test_db_setup/seeds/truncation_table_1.yml +9 -0
  69. data/spec/test_db_setup/seeds/whitelisted_table_1.yml +9 -0
  70. metadata +159 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6336fb052f506c8e195330c28ba4c3f7ae082924
4
+ data.tar.gz: 9fd2a9f813622d45839461f45a4fb2d225d4bf6a
5
+ SHA512:
6
+ metadata.gz: 1e93ff8b988dcbe559f17926fc71d361b25400b4937c75cc7948f9e2d1e50c89be7145d7ad9ff7b02a01eaa7eaf860dc31e7524cb453c5d17fe852e07e7a2dd0
7
+ data.tar.gz: e61875265eb209c2af9b0a8465c6f2c2ff3c394e2ee76ebf484b50cc99350134bdcc2b7cc8eef3b2d9bdd4ab1b73e7729d8bcbce698219fcba8a2ffb541e96f7
@@ -0,0 +1,6 @@
1
+ logs
2
+ log
3
+ .vimlog
4
+ db_schema.rb
5
+ logfile
6
+ test_logs
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'sequel'
4
+ gem 'sequel_pg'
5
+ gem 'thor'
6
+ gem 'ffaker'
7
+ gem 'pry-byebug'
8
+ gem 'activesupport'
9
+
10
+ group :test do
11
+ gem 'rspec'
12
+ gem 'aruba'
13
+ end
@@ -0,0 +1,83 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activesupport (4.2.0)
5
+ i18n (~> 0.7)
6
+ json (~> 1.7, >= 1.7.7)
7
+ minitest (~> 5.1)
8
+ thread_safe (~> 0.3, >= 0.3.4)
9
+ tzinfo (~> 1.1)
10
+ aruba (0.6.2)
11
+ childprocess (>= 0.3.6)
12
+ cucumber (>= 1.1.1)
13
+ rspec-expectations (>= 2.7.0)
14
+ builder (3.2.2)
15
+ byebug (4.0.5)
16
+ columnize (= 0.9.0)
17
+ childprocess (0.5.6)
18
+ ffi (~> 1.0, >= 1.0.11)
19
+ coderay (1.1.0)
20
+ columnize (0.9.0)
21
+ cucumber (2.0.0)
22
+ builder (>= 2.1.2)
23
+ cucumber-core (~> 1.1.3)
24
+ diff-lcs (>= 1.1.3)
25
+ gherkin (~> 2.12)
26
+ multi_json (>= 1.7.5, < 2.0)
27
+ multi_test (>= 0.1.2)
28
+ cucumber-core (1.1.3)
29
+ gherkin (~> 2.12.0)
30
+ diff-lcs (1.2.5)
31
+ ffaker (2.0.0)
32
+ ffi (1.9.10)
33
+ gherkin (2.12.2)
34
+ multi_json (~> 1.3)
35
+ i18n (0.7.0)
36
+ json (1.8.2)
37
+ method_source (0.8.2)
38
+ minitest (5.5.1)
39
+ multi_json (1.11.1)
40
+ multi_test (0.1.2)
41
+ pg (0.18.2)
42
+ pry (0.10.1)
43
+ coderay (~> 1.1.0)
44
+ method_source (~> 0.8.1)
45
+ slop (~> 3.4)
46
+ pry-byebug (3.1.0)
47
+ byebug (~> 4.0)
48
+ pry (~> 0.10)
49
+ rspec (3.3.0)
50
+ rspec-core (~> 3.3.0)
51
+ rspec-expectations (~> 3.3.0)
52
+ rspec-mocks (~> 3.3.0)
53
+ rspec-core (3.3.1)
54
+ rspec-support (~> 3.3.0)
55
+ rspec-expectations (3.3.0)
56
+ diff-lcs (>= 1.2.0, < 2.0)
57
+ rspec-support (~> 3.3.0)
58
+ rspec-mocks (3.3.1)
59
+ diff-lcs (>= 1.2.0, < 2.0)
60
+ rspec-support (~> 3.3.0)
61
+ rspec-support (3.3.0)
62
+ sequel (4.24.0)
63
+ sequel_pg (1.6.13)
64
+ pg (>= 0.8.0)
65
+ sequel (>= 3.39.0)
66
+ slop (3.6.0)
67
+ thor (0.19.1)
68
+ thread_safe (0.3.4)
69
+ tzinfo (1.2.2)
70
+ thread_safe (~> 0.1)
71
+
72
+ PLATFORMS
73
+ ruby
74
+
75
+ DEPENDENCIES
76
+ activesupport
77
+ aruba
78
+ ffaker
79
+ pry-byebug
80
+ rspec
81
+ sequel
82
+ sequel_pg
83
+ thor
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Case Commons
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,121 @@
1
+ # [db_obfuscation](https://github.com/CasecommonsDevops/db_obfuscation)
2
+
3
+
4
+ `db_obfuscation` is a gem that helps to prepare a production size obfuscated database. This obfuscated database can be used for internal testing purposes like user acceptance testing, QA/Regression testing.
5
+
6
+ `db_obfuscation` takes a production database and updates data in every row in each table with fake data. `db_obfuscation` ensures that associations between different tables are still maintained.
7
+
8
+ **The gem supports only postgres databases at the moment.**
9
+
10
+ ## Installation
11
+
12
+ `gem install db_obfuscation`
13
+
14
+
15
+ ## Usage
16
+
17
+ ```sh
18
+ db_obfuscation obfuscate -c <path of obfuscation_configuration>
19
+ -s <Number of rows to be obfuscated in each db transaction> #default 100
20
+ -l <name_of_log_file>
21
+ ```
22
+
23
+ `step_size` is a configuration that depends on every use case. It depends on the processing power of the computer, size of the table etc.
24
+
25
+ In our experience, 100 row updates per database transaction has been the most optimum configuration for a database. However this number may need to be changed to optimize the performance for your database.
26
+
27
+
28
+ ## Configuration
29
+
30
+ A sample configuration folder for the gem is included with the gem. The sample folder is at `spec/config`.
31
+
32
+ A generic configuration folder consists of following files and folders,
33
+
34
+ 1. **Database Configuration file**
35
+
36
+ `<path_to_config_folder>/database.yml`
37
+
38
+ This file contains credentials to connect to the database. This file needs adapter name, host, encoding, username, password, and name of the database.
39
+
40
+ Sample `database.yml` file:
41
+
42
+ ```yaml
43
+ adapter: postgres
44
+ host: localhost
45
+ encoding: unicode
46
+ username: database_user
47
+ database: obfuscation_test
48
+ password: database_password
49
+ ```
50
+
51
+ 2. **Table Strategies**
52
+
53
+ `<path_to_config_folder>/table_strategies`
54
+
55
+ This folder contains a yaml file for every table, for which a users desires to override default obfuscation configuration.
56
+
57
+ Each table file contains a mapping between columns and obfuscation strategy for that column. The filename is same as the table whose configuration is specified.
58
+
59
+ A sample table strategy file is like,
60
+
61
+ `<spec/config/table_strategies/table_2.yml>`
62
+
63
+ ```yaml
64
+ table_2:
65
+ field_1: :default_strategy
66
+ field_2: :whitelisted
67
+ date_field: :date_strategy
68
+ field_3: :first_name_strategy
69
+ ```
70
+
71
+ `db_obfuscation`, by default, obfuscates every **string** column in a table.
72
+
73
+ It uses a random word to obfuscate every string column. This default behaviour can be overridden on column and table basis by specifying different strategies respectively.
74
+
75
+ Different strategies supported are,
76
+
77
+ - `:whitelisted` to skip obfuscating a particular string column in a table
78
+ - `:date_strategy` to include a date column that needs to be obfuscated.
79
+
80
+ Date columns in a table are not obfuscated by default. Including `:date_strategy` adds a random number of days between 31 and 240 to the current value of date.
81
+ - Complete list of different strategies is [here](https://github.com/CaseCommonsDevOps/db_obfuscation/blob/master/lib/db_obfuscation/obfuscator.rb).
82
+
83
+ 3. **Truncation Tables**
84
+
85
+ `<path_to_config_folder>/truncation_patterns.yml`
86
+
87
+ This file contains string patterns for table names that need to truncated instead of being obfuscated.
88
+
89
+ Any table name that is the same as the pattern or begins with that pattern, followed by an underscore will be truncated during the obfuscation process.
90
+
91
+ A sample `truncation_patterns.yml` file is like,
92
+
93
+ ```yaml
94
+ - truncation_table_1
95
+ - audit
96
+ ```
97
+
98
+ Any table that begins with the word `audit_` will be selected for truncation.
99
+
100
+ 4. **Whitelisted Tables**
101
+
102
+ `<path_to_config_folder/whitelisted_tables.yml`
103
+
104
+ This file contains names of tables that don't need to be obfuscated and should not be touched.
105
+
106
+ A sample `whitelisted_tables.yml` looks like this,
107
+
108
+ ```yaml
109
+ - whitelisted_table_1
110
+ - whitelisted_table_2
111
+ ```
112
+
113
+ ## Requirements
114
+
115
+ - Ruby 2.x
116
+
117
+ ## License
118
+
119
+ Copyright © 2015 Case Commons & Rajat Agrawal.
120
+
121
+ Licensed under the MIT license, available in the “LICENSE” file.
data/TODO ADDED
@@ -0,0 +1,15 @@
1
+ 1. Show the table number being obfuscated
2
+ 2. Don't obfuscate a table if the first update shows an exception
3
+ 3. Generalize triggers patterns
4
+ 4. Verify the configuration that has been specified by the user for obfuscation against the database. Like the column exists or not in the table strategy file
5
+ 5. Choosing verbosity of logs on the command line
6
+ 6. support both symbol and string use in user configuration files
7
+ 7. replace aruba cucumber with aruba rspec
8
+ 8. Don't fail in any of the user configuration is missing
9
+ 9. Write a task to generate a sample configuration
10
+ 10. Adding a dummy task
11
+
12
+
13
+ Nice to have
14
+
15
+ 1. Ability to obfuscate a single table from the command line
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+
5
+ config_path = ARGV[0]
6
+ if config_path.nil?
7
+ puts 'Please enter the obfuscation config folder path'
8
+ exit
9
+ end
10
+
11
+ require 'bundler'
12
+ Bundler.setup(:default)
13
+
14
+ lib_path = File.expand_path('../../lib', __FILE__)
15
+ $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
16
+
17
+ require 'db_obfuscation/environment'
18
+ DbObfuscation.config_dir = Pathname.new(config_path)
19
+
20
+ require 'db_obfuscation'
21
+ require 'db_obfuscation/util/trigger'
22
+ require 'pry'
23
+
24
+ puts 'Console is starting'
25
+ Pry.start
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "thor"
4
+
5
+ class DbObfuscationCli < Thor
6
+
7
+ desc 'dump', 'Take a dump of the obfuscated db'
8
+
9
+ option(:db_config,
10
+ aliases: :c,
11
+ required: true,
12
+ type: :string,
13
+ banner: 'db connection file for the db to be dumped')
14
+
15
+ option(:dump_name,
16
+ aliases: :n,
17
+ required: true,
18
+ type: :string,
19
+ banner: 'Name of the db dump',
20
+ description: <<-description.gsub(/\s{2,}/,' ')
21
+ A database dump helps to send the obfuscated dump
22
+ to another computer. Use this command after obfuscating
23
+ a database.
24
+ description
25
+ )
26
+ option(:log_file,
27
+ aliases: :l,
28
+ default: STDOUT,
29
+ type: :string,
30
+ banner: 'Name of log file for logging')
31
+ def dump
32
+ begin
33
+ require_relative '../lib/db_obfuscation/environment'
34
+ initialize_logger(options[:log_file])
35
+
36
+ require 'db_dump'
37
+
38
+ DbObfuscation.logging.info 'Start process'
39
+ DbObfuscation.logging.info 'Start dumping'
40
+ DbObfuscation::Cli::DbDump.dump(options[:db_config], options[:dump_name])
41
+ DbObfuscation.logging.info 'Finish dumping'
42
+
43
+ rescue Exception => e
44
+ DbObfuscation.logging.fatal 'Caught an exception'
45
+ DbObfuscation.logging.fatal e.message
46
+ DbObfuscation.logging.fatal e.backtrace
47
+ ensure
48
+ DbObfuscation.logging.info 'Process finished'
49
+ end
50
+ end
51
+
52
+ desc 'obfuscate', 'Obfuscates a database'
53
+ option(:config_path,
54
+ aliases: :c,
55
+ required: true,
56
+ type: :string,
57
+ banner: 'Folder path for obfuscation strategies')
58
+ option(:step_size,
59
+ aliases: :s,
60
+ default: 100,
61
+ type: :numeric,
62
+ banner: 'Number of rows to be obfuscated in every transaction')
63
+ option(:log_file,
64
+ aliases: :l,
65
+ default: STDOUT,
66
+ type: :string,
67
+ banner: 'Name of log file for logging')
68
+
69
+ def obfuscate
70
+ begin
71
+ require_relative '../lib/db_obfuscation/environment'
72
+ DbObfuscation.config_dir = Pathname.new(options[:config_path])
73
+ initialize_logger(options[:log_file])
74
+
75
+ DbObfuscation.logging.info 'Starting process'
76
+
77
+ require DbObfuscation::ROOT.join('lib/db_obfuscation')
78
+ require DbObfuscation::ROOT.join('lib/db_obfuscation/' + 'truncation')
79
+ require DbObfuscation::ROOT.join('lib/db_obfuscation/util/' + 'trigger')
80
+
81
+
82
+
83
+ DbObfuscation.logging.info 'Disabling triggers'
84
+ DbObfuscation::Util::Trigger.disable(:all)
85
+ DbObfuscation.logging.info 'Finished disabling triggers'
86
+
87
+ DbObfuscation.logging.info 'Truncating Tables'
88
+ DbObfuscation::Truncation.truncate
89
+ DbObfuscation.logging.info 'Finished truncating tables'
90
+
91
+
92
+ DbObfuscation.logging.info 'Starting DbObfuscation'
93
+ DbObfuscation.obfuscate(options[:step_size])
94
+ DbObfuscation.logging.info 'Finished DbObfuscation'
95
+
96
+ rescue Exception => e
97
+ DbObfuscation.logging.fatal 'Caught an exception'
98
+ DbObfuscation.logging.fatal e.message
99
+ DbObfuscation.logging.fatal e.backtrace
100
+ ensure
101
+ DbObfuscation.logging.info 'Finishing process'
102
+ DbObfuscation.logging.info 'Enabling triggers'
103
+ DbObfuscation::Util::Trigger.enable(:all)
104
+ DbObfuscation.logging.info 'Finished enabling triggers'
105
+
106
+ DbObfuscation.logging.info 'Process finished'
107
+ end
108
+ end
109
+
110
+ no_tasks do
111
+ def initialize_logger(filename)
112
+ if filename != STDOUT
113
+ f = File.new(filename, 'w')
114
+ f.close
115
+ end
116
+ DbObfuscation.logging = Logger.new(filename)
117
+ end
118
+ end
119
+ end
120
+
121
+ DbObfuscationCli.start
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thor'
4
+ require 'yaml'
5
+ require_relative '../lib/db_obfuscation/environment'
6
+
7
+ class ObfuscationTestCli < Thor
8
+
9
+ desc 'initialize_database', 'Initializes a test environment database'
10
+ option(:config,
11
+ aliases: :c,
12
+ type: :string,
13
+ default: DbObfuscation::ROOT.join('spec/config/database.yml'),
14
+ banner: 'Database yml containing the credentials to create/connect to the database')
15
+
16
+ option(:data_path,
17
+ aliases: :d,
18
+ type: :string,
19
+ default: DbObfuscation::ROOT.join('spec/test_db_setup'),
20
+ banner: 'Root folder path containing migrations and seed files')
21
+
22
+ def initialize_database
23
+ require 'pathname'
24
+ config = YAML.load_file(Pathname.new(options[:config]))
25
+ db_name = config['database']
26
+ owner = config['username']
27
+ migrations_path = Pathname.new(options[:data_path]).join('migrations')
28
+ seeds_path = Pathname.new(options[:data_path]).join('seeds')
29
+
30
+ puts "Dropping database #{db_name}"
31
+ system("dropdb -e #{db_name}")
32
+ puts 'Successfully dropped the database'
33
+
34
+ puts 'Creating database'
35
+ if owner
36
+ system("createdb -e -O #{owner} #{db_name}")
37
+ else
38
+ system("createdb -e #{db_name}")
39
+ end
40
+ puts 'Finished creating database'
41
+
42
+ puts 'Starting up migration'
43
+ require_relative '../cli/migrator'
44
+ DbObfuscation::Cli::Migrator.migrate(options[:config], migrations_path)
45
+ puts 'Finished migration'
46
+
47
+ puts 'Starting seeding'
48
+ require_relative '../cli/seeder'
49
+ DbObfuscation::Cli::Seeder.seed(options[:config], seeds_path)
50
+ puts 'Finished seeding'
51
+ end
52
+ end
53
+
54
+ ObfuscationTestCli.start