db_obfuscation 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/Gemfile +13 -0
  4. data/Gemfile.lock +83 -0
  5. data/LICENSE +21 -0
  6. data/README.md +121 -0
  7. data/TODO +15 -0
  8. data/bin/console +25 -0
  9. data/bin/db_obfuscation +121 -0
  10. data/bin/obfuscation_test +54 -0
  11. data/cli/db_dump.rb +29 -0
  12. data/cli/migrator.rb +26 -0
  13. data/cli/seeder.rb +52 -0
  14. data/db_obfuscation.gemspec +25 -0
  15. data/features/bin/dump.feature +21 -0
  16. data/features/bin/obfuscation.feature +12 -0
  17. data/features/bin/test_database_tasks.feature +16 -0
  18. data/features/support.rb +1 -0
  19. data/lib/db_obfuscation.rb +50 -0
  20. data/lib/db_obfuscation/batch_formulator.rb +26 -0
  21. data/lib/db_obfuscation/config.rb +43 -0
  22. data/lib/db_obfuscation/database.rb +8 -0
  23. data/lib/db_obfuscation/environment.rb +14 -0
  24. data/lib/db_obfuscation/filtering.rb +56 -0
  25. data/lib/db_obfuscation/filtering/column.rb +40 -0
  26. data/lib/db_obfuscation/filtering/truncation.rb +18 -0
  27. data/lib/db_obfuscation/obfuscation_strategy.rb +22 -0
  28. data/lib/db_obfuscation/obfuscator.rb +65 -0
  29. data/lib/db_obfuscation/query_builder.rb +62 -0
  30. data/lib/db_obfuscation/truncation.rb +39 -0
  31. data/lib/db_obfuscation/util/trigger.rb +83 -0
  32. data/lib/db_obfuscation/version.rb +4 -0
  33. data/spec/cli/db_dump_spec.rb +33 -0
  34. data/spec/cli/migrator_spec.rb +59 -0
  35. data/spec/cli/seeder_spec.rb +33 -0
  36. data/spec/config/database.yml +5 -0
  37. data/spec/config/table_strategies/table_1.yml +3 -0
  38. data/spec/config/table_strategies/table_2.yml +4 -0
  39. data/spec/config/table_strategies/truncation_table_1.yml +3 -0
  40. data/spec/config/table_strategies/whitelisted_table_1.yml +3 -0
  41. data/spec/config/truncation_patterns.yml +2 -0
  42. data/spec/config/whitelisted_tables.yml +1 -0
  43. data/spec/db_obfuscation/batch_formulator_spec.rb +36 -0
  44. data/spec/db_obfuscation/config_spec.rb +60 -0
  45. data/spec/db_obfuscation/database_spec.rb +10 -0
  46. data/spec/db_obfuscation/filtering/column_spec.rb +82 -0
  47. data/spec/db_obfuscation/filtering/truncation_spec.rb +41 -0
  48. data/spec/db_obfuscation/filtering_spec.rb +39 -0
  49. data/spec/db_obfuscation/obfuscation_strategy_spec.rb +43 -0
  50. data/spec/db_obfuscation/obfuscator_spec.rb +150 -0
  51. data/spec/db_obfuscation/query_builder_spec.rb +259 -0
  52. data/spec/db_obfuscation/truncation_spec.rb +31 -0
  53. data/spec/db_obfuscation/util/trigger_spec.rb +126 -0
  54. data/spec/integration/obfuscation_spec.rb +69 -0
  55. data/spec/spec_helper.rb +3 -0
  56. data/spec/test_db_setup/migrations/1_add_table_1.rb +18 -0
  57. data/spec/test_db_setup/migrations/2_add_table_2.rb +19 -0
  58. data/spec/test_db_setup/migrations/3_add_truncation_table_1.rb +14 -0
  59. data/spec/test_db_setup/migrations/4_add_whitelisted_table_1.rb +14 -0
  60. data/spec/test_db_setup/migrations/5_add_table_without_any_user_defined_obfuscation_strategies.rb +18 -0
  61. data/spec/test_db_setup/migrations/6_add_table_without_any_obfuscatable_columns.rb +15 -0
  62. data/spec/test_db_setup/migrations/7_add_audit_truncation_table.rb +13 -0
  63. data/spec/test_db_setup/seeds/audit_truncation_table.yml +7 -0
  64. data/spec/test_db_setup/seeds/table_1.yml +13 -0
  65. data/spec/test_db_setup/seeds/table_2.yml +15 -0
  66. data/spec/test_db_setup/seeds/table_without_any_obfuscatable_columns.yml +7 -0
  67. data/spec/test_db_setup/seeds/table_without_any_user_defined_obfuscation_strategies.yml +13 -0
  68. data/spec/test_db_setup/seeds/truncation_table_1.yml +9 -0
  69. data/spec/test_db_setup/seeds/whitelisted_table_1.yml +9 -0
  70. metadata +159 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6336fb052f506c8e195330c28ba4c3f7ae082924
4
+ data.tar.gz: 9fd2a9f813622d45839461f45a4fb2d225d4bf6a
5
+ SHA512:
6
+ metadata.gz: 1e93ff8b988dcbe559f17926fc71d361b25400b4937c75cc7948f9e2d1e50c89be7145d7ad9ff7b02a01eaa7eaf860dc31e7524cb453c5d17fe852e07e7a2dd0
7
+ data.tar.gz: e61875265eb209c2af9b0a8465c6f2c2ff3c394e2ee76ebf484b50cc99350134bdcc2b7cc8eef3b2d9bdd4ab1b73e7729d8bcbce698219fcba8a2ffb541e96f7
@@ -0,0 +1,6 @@
1
+ logs
2
+ log
3
+ .vimlog
4
+ db_schema.rb
5
+ logfile
6
+ test_logs
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'sequel'
4
+ gem 'sequel_pg'
5
+ gem 'thor'
6
+ gem 'ffaker'
7
+ gem 'pry-byebug'
8
+ gem 'activesupport'
9
+
10
+ group :test do
11
+ gem 'rspec'
12
+ gem 'aruba'
13
+ end
@@ -0,0 +1,83 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activesupport (4.2.0)
5
+ i18n (~> 0.7)
6
+ json (~> 1.7, >= 1.7.7)
7
+ minitest (~> 5.1)
8
+ thread_safe (~> 0.3, >= 0.3.4)
9
+ tzinfo (~> 1.1)
10
+ aruba (0.6.2)
11
+ childprocess (>= 0.3.6)
12
+ cucumber (>= 1.1.1)
13
+ rspec-expectations (>= 2.7.0)
14
+ builder (3.2.2)
15
+ byebug (4.0.5)
16
+ columnize (= 0.9.0)
17
+ childprocess (0.5.6)
18
+ ffi (~> 1.0, >= 1.0.11)
19
+ coderay (1.1.0)
20
+ columnize (0.9.0)
21
+ cucumber (2.0.0)
22
+ builder (>= 2.1.2)
23
+ cucumber-core (~> 1.1.3)
24
+ diff-lcs (>= 1.1.3)
25
+ gherkin (~> 2.12)
26
+ multi_json (>= 1.7.5, < 2.0)
27
+ multi_test (>= 0.1.2)
28
+ cucumber-core (1.1.3)
29
+ gherkin (~> 2.12.0)
30
+ diff-lcs (1.2.5)
31
+ ffaker (2.0.0)
32
+ ffi (1.9.10)
33
+ gherkin (2.12.2)
34
+ multi_json (~> 1.3)
35
+ i18n (0.7.0)
36
+ json (1.8.2)
37
+ method_source (0.8.2)
38
+ minitest (5.5.1)
39
+ multi_json (1.11.1)
40
+ multi_test (0.1.2)
41
+ pg (0.18.2)
42
+ pry (0.10.1)
43
+ coderay (~> 1.1.0)
44
+ method_source (~> 0.8.1)
45
+ slop (~> 3.4)
46
+ pry-byebug (3.1.0)
47
+ byebug (~> 4.0)
48
+ pry (~> 0.10)
49
+ rspec (3.3.0)
50
+ rspec-core (~> 3.3.0)
51
+ rspec-expectations (~> 3.3.0)
52
+ rspec-mocks (~> 3.3.0)
53
+ rspec-core (3.3.1)
54
+ rspec-support (~> 3.3.0)
55
+ rspec-expectations (3.3.0)
56
+ diff-lcs (>= 1.2.0, < 2.0)
57
+ rspec-support (~> 3.3.0)
58
+ rspec-mocks (3.3.1)
59
+ diff-lcs (>= 1.2.0, < 2.0)
60
+ rspec-support (~> 3.3.0)
61
+ rspec-support (3.3.0)
62
+ sequel (4.24.0)
63
+ sequel_pg (1.6.13)
64
+ pg (>= 0.8.0)
65
+ sequel (>= 3.39.0)
66
+ slop (3.6.0)
67
+ thor (0.19.1)
68
+ thread_safe (0.3.4)
69
+ tzinfo (1.2.2)
70
+ thread_safe (~> 0.1)
71
+
72
+ PLATFORMS
73
+ ruby
74
+
75
+ DEPENDENCIES
76
+ activesupport
77
+ aruba
78
+ ffaker
79
+ pry-byebug
80
+ rspec
81
+ sequel
82
+ sequel_pg
83
+ thor
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Case Commons
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,121 @@
1
+ # [db_obfuscation](https://github.com/CasecommonsDevops/db_obfuscation)
2
+
3
+
4
+ `db_obfuscation` is a gem that helps to prepare a production size obfuscated database. This obfuscated database can be used for internal testing purposes like user acceptance testing, QA/Regression testing.
5
+
6
+ `db_obfuscation` takes a production database and updates data in every row in each table with fake data. `db_obfuscation` ensures that associations between different tables are still maintained.
7
+
8
+ **The gem supports only postgres databases at the moment.**
9
+
10
+ ## Installation
11
+
12
+ `gem install db_obfuscation`
13
+
14
+
15
+ ## Usage
16
+
17
+ ```sh
18
+ db_obfuscation obfuscate -c <path of obfuscation_configuration>
19
+ -s <Number of rows to be obfuscated in each db transaction> #default 100
20
+ -l <name_of_log_file>
21
+ ```
22
+
23
+ `step_size` is a configuration that depends on every use case. It depends on the processing power of the computer, size of the table etc.
24
+
25
+ In our experience, 100 row updates per database transaction has been the most optimum configuration for a database. However this number may need to be changed to optimize the performance for your database.
26
+
27
+
28
+ ## Configuration
29
+
30
+ A sample configuration folder for the gem is included with the gem. The sample folder is at `spec/config`.
31
+
32
+ A generic configuration folder consists of following files and folders,
33
+
34
+ 1. **Database Configuration file**
35
+
36
+ `<path_to_config_folder>/database.yml`
37
+
38
+ This file contains credentials to connect to the database. This file needs adapter name, host, encoding, username, password, and name of the database.
39
+
40
+ Sample `database.yml` file:
41
+
42
+ ```yaml
43
+ adapter: postgres
44
+ host: localhost
45
+ encoding: unicode
46
+ username: database_user
47
+ database: obfuscation_test
48
+ password: database_password
49
+ ```
50
+
51
+ 2. **Table Strategies**
52
+
53
+ `<path_to_config_folder>/table_strategies`
54
+
55
+ This folder contains a yaml file for every table, for which a users desires to override default obfuscation configuration.
56
+
57
+ Each table file contains a mapping between columns and obfuscation strategy for that column. The filename is same as the table whose configuration is specified.
58
+
59
+ A sample table strategy file is like,
60
+
61
+ `<spec/config/table_strategies/table_2.yml>`
62
+
63
+ ```yaml
64
+ table_2:
65
+ field_1: :default_strategy
66
+ field_2: :whitelisted
67
+ date_field: :date_strategy
68
+ field_3: :first_name_strategy
69
+ ```
70
+
71
+ `db_obfuscation`, by default, obfuscates every **string** column in a table.
72
+
73
+ It uses a random word to obfuscate every string column. This default behaviour can be overridden on column and table basis by specifying different strategies respectively.
74
+
75
+ Different strategies supported are,
76
+
77
+ - `:whitelisted` to skip obfuscating a particular string column in a table
78
+ - `:date_strategy` to include a date column that needs to be obfuscated.
79
+
80
+ Date columns in a table are not obfuscated by default. Including `:date_strategy` adds a random number of days between 31 and 240 to the current value of date.
81
+ - Complete list of different strategies is [here](https://github.com/CaseCommonsDevOps/db_obfuscation/blob/master/lib/db_obfuscation/obfuscator.rb).
82
+
83
+ 3. **Truncation Tables**
84
+
85
+ `<path_to_config_folder>/truncation_patterns.yml`
86
+
87
+ This file contains string patterns for table names that need to truncated instead of being obfuscated.
88
+
89
+ Any table name that is the same as the pattern or begins with that pattern, followed by an underscore will be truncated during the obfuscation process.
90
+
91
+ A sample `truncation_patterns.yml` file is like,
92
+
93
+ ```yaml
94
+ - truncation_table_1
95
+ - audit
96
+ ```
97
+
98
+ Any table that begins with the word `audit_` will be selected for truncation.
99
+
100
+ 4. **Whitelisted Tables**
101
+
102
+ `<path_to_config_folder/whitelisted_tables.yml`
103
+
104
+ This file contains names of tables that don't need to be obfuscated and should not be touched.
105
+
106
+ A sample `whitelisted_tables.yml` looks like this,
107
+
108
+ ```yaml
109
+ - whitelisted_table_1
110
+ - whitelisted_table_2
111
+ ```
112
+
113
+ ## Requirements
114
+
115
+ - Ruby 2.x
116
+
117
+ ## License
118
+
119
+ Copyright © 2015 Case Commons & Rajat Agrawal.
120
+
121
+ Licensed under the MIT license, available in the “LICENSE” file.
data/TODO ADDED
@@ -0,0 +1,15 @@
1
+ 1. Show the table number being obfuscated
2
+ 2. Don't obfuscate a table if the first update shows an exception
3
+ 3. Generalize triggers patterns
4
+ 4. Verify the configuration that has been specified by the user for obfuscation against the database. Like the column exists or not in the table strategy file
5
+ 5. Choosing verbosity of logs on the command line
6
+ 6. support both symbol and string use in user configuration files
7
+ 7. replace aruba cucumber with aruba rspec
8
+ 8. Don't fail in any of the user configuration is missing
9
+ 9. Write a task to generate a sample configuration
10
+ 10. Adding a dummy task
11
+
12
+
13
+ Nice to have
14
+
15
+ 1. Ability to obfuscate a single table from the command line
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pathname'
4
+
5
+ config_path = ARGV[0]
6
+ if config_path.nil?
7
+ puts 'Please enter the obfuscation config folder path'
8
+ exit
9
+ end
10
+
11
+ require 'bundler'
12
+ Bundler.setup(:default)
13
+
14
+ lib_path = File.expand_path('../../lib', __FILE__)
15
+ $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
16
+
17
+ require 'db_obfuscation/environment'
18
+ DbObfuscation.config_dir = Pathname.new(config_path)
19
+
20
+ require 'db_obfuscation'
21
+ require 'db_obfuscation/util/trigger'
22
+ require 'pry'
23
+
24
+ puts 'Console is starting'
25
+ Pry.start
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "thor"
4
+
5
+ class DbObfuscationCli < Thor
6
+
7
+ desc 'dump', 'Take a dump of the obfuscated db'
8
+
9
+ option(:db_config,
10
+ aliases: :c,
11
+ required: true,
12
+ type: :string,
13
+ banner: 'db connection file for the db to be dumped')
14
+
15
+ option(:dump_name,
16
+ aliases: :n,
17
+ required: true,
18
+ type: :string,
19
+ banner: 'Name of the db dump',
20
+ description: <<-description.gsub(/\s{2,}/,' ')
21
+ A database dump helps to send the obfuscated dump
22
+ to another computer. Use this command after obfuscating
23
+ a database.
24
+ description
25
+ )
26
+ option(:log_file,
27
+ aliases: :l,
28
+ default: STDOUT,
29
+ type: :string,
30
+ banner: 'Name of log file for logging')
31
+ def dump
32
+ begin
33
+ require_relative '../lib/db_obfuscation/environment'
34
+ initialize_logger(options[:log_file])
35
+
36
+ require 'db_dump'
37
+
38
+ DbObfuscation.logging.info 'Start process'
39
+ DbObfuscation.logging.info 'Start dumping'
40
+ DbObfuscation::Cli::DbDump.dump(options[:db_config], options[:dump_name])
41
+ DbObfuscation.logging.info 'Finish dumping'
42
+
43
+ rescue Exception => e
44
+ DbObfuscation.logging.fatal 'Caught an exception'
45
+ DbObfuscation.logging.fatal e.message
46
+ DbObfuscation.logging.fatal e.backtrace
47
+ ensure
48
+ DbObfuscation.logging.info 'Process finished'
49
+ end
50
+ end
51
+
52
+ desc 'obfuscate', 'Obfuscates a database'
53
+ option(:config_path,
54
+ aliases: :c,
55
+ required: true,
56
+ type: :string,
57
+ banner: 'Folder path for obfuscation strategies')
58
+ option(:step_size,
59
+ aliases: :s,
60
+ default: 100,
61
+ type: :numeric,
62
+ banner: 'Number of rows to be obfuscated in every transaction')
63
+ option(:log_file,
64
+ aliases: :l,
65
+ default: STDOUT,
66
+ type: :string,
67
+ banner: 'Name of log file for logging')
68
+
69
+ def obfuscate
70
+ begin
71
+ require_relative '../lib/db_obfuscation/environment'
72
+ DbObfuscation.config_dir = Pathname.new(options[:config_path])
73
+ initialize_logger(options[:log_file])
74
+
75
+ DbObfuscation.logging.info 'Starting process'
76
+
77
+ require DbObfuscation::ROOT.join('lib/db_obfuscation')
78
+ require DbObfuscation::ROOT.join('lib/db_obfuscation/' + 'truncation')
79
+ require DbObfuscation::ROOT.join('lib/db_obfuscation/util/' + 'trigger')
80
+
81
+
82
+
83
+ DbObfuscation.logging.info 'Disabling triggers'
84
+ DbObfuscation::Util::Trigger.disable(:all)
85
+ DbObfuscation.logging.info 'Finished disabling triggers'
86
+
87
+ DbObfuscation.logging.info 'Truncating Tables'
88
+ DbObfuscation::Truncation.truncate
89
+ DbObfuscation.logging.info 'Finished truncating tables'
90
+
91
+
92
+ DbObfuscation.logging.info 'Starting DbObfuscation'
93
+ DbObfuscation.obfuscate(options[:step_size])
94
+ DbObfuscation.logging.info 'Finished DbObfuscation'
95
+
96
+ rescue Exception => e
97
+ DbObfuscation.logging.fatal 'Caught an exception'
98
+ DbObfuscation.logging.fatal e.message
99
+ DbObfuscation.logging.fatal e.backtrace
100
+ ensure
101
+ DbObfuscation.logging.info 'Finishing process'
102
+ DbObfuscation.logging.info 'Enabling triggers'
103
+ DbObfuscation::Util::Trigger.enable(:all)
104
+ DbObfuscation.logging.info 'Finished enabling triggers'
105
+
106
+ DbObfuscation.logging.info 'Process finished'
107
+ end
108
+ end
109
+
110
+ no_tasks do
111
+ def initialize_logger(filename)
112
+ if filename != STDOUT
113
+ f = File.new(filename, 'w')
114
+ f.close
115
+ end
116
+ DbObfuscation.logging = Logger.new(filename)
117
+ end
118
+ end
119
+ end
120
+
121
+ DbObfuscationCli.start
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thor'
4
+ require 'yaml'
5
+ require_relative '../lib/db_obfuscation/environment'
6
+
7
+ class ObfuscationTestCli < Thor
8
+
9
+ desc 'initialize_database', 'Initializes a test environment database'
10
+ option(:config,
11
+ aliases: :c,
12
+ type: :string,
13
+ default: DbObfuscation::ROOT.join('spec/config/database.yml'),
14
+ banner: 'Database yml containing the credentials to create/connect to the database')
15
+
16
+ option(:data_path,
17
+ aliases: :d,
18
+ type: :string,
19
+ default: DbObfuscation::ROOT.join('spec/test_db_setup'),
20
+ banner: 'Root folder path containing migrations and seed files')
21
+
22
+ def initialize_database
23
+ require 'pathname'
24
+ config = YAML.load_file(Pathname.new(options[:config]))
25
+ db_name = config['database']
26
+ owner = config['username']
27
+ migrations_path = Pathname.new(options[:data_path]).join('migrations')
28
+ seeds_path = Pathname.new(options[:data_path]).join('seeds')
29
+
30
+ puts "Dropping database #{db_name}"
31
+ system("dropdb -e #{db_name}")
32
+ puts 'Successfully dropped the database'
33
+
34
+ puts 'Creating database'
35
+ if owner
36
+ system("createdb -e -O #{owner} #{db_name}")
37
+ else
38
+ system("createdb -e #{db_name}")
39
+ end
40
+ puts 'Finished creating database'
41
+
42
+ puts 'Starting up migration'
43
+ require_relative '../cli/migrator'
44
+ DbObfuscation::Cli::Migrator.migrate(options[:config], migrations_path)
45
+ puts 'Finished migration'
46
+
47
+ puts 'Starting seeding'
48
+ require_relative '../cli/seeder'
49
+ DbObfuscation::Cli::Seeder.seed(options[:config], seeds_path)
50
+ puts 'Finished seeding'
51
+ end
52
+ end
53
+
54
+ ObfuscationTestCli.start