active_sanitization 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 621371200d0de25b03a94a5cc486bbfd8438950f
4
+ data.tar.gz: 96dd6d55756ac5d0cfa26c0eda7aa24099f31e1b
5
+ SHA512:
6
+ metadata.gz: 83d4f4053ac906dd77c8c2805f1e03ad524a54205bd8484086b2ade857ad48b8a7f90b54769a6f0a304c4df49719f99a2ef5c4d7861e64f31f4a61d8c05fbac8
7
+ data.tar.gz: f0b10f1d68af8e9e2028007dac8f1a4ad26b2692a287b4f2c3d465998e28da66760bb82e887eac2e6337d45b4c4be6b570f338f817693dcdeacecaf635798b09
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,7 @@
1
+ sudo: false
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.2.1
6
+ before_script:
7
+ - mkdir tmp
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # 0.1.0
2
+ Initial release
3
+ - Sets up gem
4
+ - Adds dependencies
5
+ - Exposes rake tasks
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in active_sanitization.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,157 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ active_sanitization (0.1.0)
5
+ aws-sdk (~> 2.0.33)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ actionmailer (4.2.1)
11
+ actionpack (= 4.2.1)
12
+ actionview (= 4.2.1)
13
+ activejob (= 4.2.1)
14
+ mail (~> 2.5, >= 2.5.4)
15
+ rails-dom-testing (~> 1.0, >= 1.0.5)
16
+ actionpack (4.2.1)
17
+ actionview (= 4.2.1)
18
+ activesupport (= 4.2.1)
19
+ rack (~> 1.6)
20
+ rack-test (~> 0.6.2)
21
+ rails-dom-testing (~> 1.0, >= 1.0.5)
22
+ rails-html-sanitizer (~> 1.0, >= 1.0.1)
23
+ actionview (4.2.1)
24
+ activesupport (= 4.2.1)
25
+ builder (~> 3.1)
26
+ erubis (~> 2.7.0)
27
+ rails-dom-testing (~> 1.0, >= 1.0.5)
28
+ rails-html-sanitizer (~> 1.0, >= 1.0.1)
29
+ activejob (4.2.1)
30
+ activesupport (= 4.2.1)
31
+ globalid (>= 0.3.0)
32
+ activemodel (4.2.1)
33
+ activesupport (= 4.2.1)
34
+ builder (~> 3.1)
35
+ activerecord (4.2.1)
36
+ activemodel (= 4.2.1)
37
+ activesupport (= 4.2.1)
38
+ arel (~> 6.0)
39
+ activesupport (4.2.1)
40
+ i18n (~> 0.7)
41
+ json (~> 1.7, >= 1.7.7)
42
+ minitest (~> 5.1)
43
+ thread_safe (~> 0.3, >= 0.3.4)
44
+ tzinfo (~> 1.1)
45
+ arel (6.0.0)
46
+ aws-sdk (2.0.33)
47
+ aws-sdk-resources (= 2.0.33)
48
+ aws-sdk-core (2.0.33)
49
+ builder (~> 3.0)
50
+ jmespath (~> 1.0)
51
+ multi_json (~> 1.0)
52
+ multi_xml (~> 0.5)
53
+ aws-sdk-resources (2.0.33)
54
+ aws-sdk-core (= 2.0.33)
55
+ builder (3.2.2)
56
+ byebug (4.0.4)
57
+ columnize (= 0.9.0)
58
+ coderay (1.1.0)
59
+ columnize (0.9.0)
60
+ diff-lcs (1.2.5)
61
+ erubis (2.7.0)
62
+ globalid (0.3.3)
63
+ activesupport (>= 4.1.0)
64
+ hike (1.2.3)
65
+ i18n (0.7.0)
66
+ jmespath (1.0.2)
67
+ multi_json (~> 1.0)
68
+ json (1.8.2)
69
+ loofah (2.0.1)
70
+ nokogiri (>= 1.5.9)
71
+ mail (2.6.3)
72
+ mime-types (>= 1.16, < 3)
73
+ method_source (0.8.2)
74
+ mime-types (2.4.3)
75
+ mini_portile (0.6.2)
76
+ minitest (5.5.1)
77
+ multi_json (1.11.0)
78
+ multi_xml (0.5.5)
79
+ mysql2 (0.3.18)
80
+ nokogiri (1.6.6.2)
81
+ mini_portile (~> 0.6.0)
82
+ pry (0.10.1)
83
+ coderay (~> 1.1.0)
84
+ method_source (~> 0.8.1)
85
+ slop (~> 3.4)
86
+ rack (1.6.0)
87
+ rack-test (0.6.3)
88
+ rack (>= 1.0)
89
+ rails (4.2.1)
90
+ actionmailer (= 4.2.1)
91
+ actionpack (= 4.2.1)
92
+ actionview (= 4.2.1)
93
+ activejob (= 4.2.1)
94
+ activemodel (= 4.2.1)
95
+ activerecord (= 4.2.1)
96
+ activesupport (= 4.2.1)
97
+ bundler (>= 1.3.0, < 2.0)
98
+ railties (= 4.2.1)
99
+ sprockets-rails
100
+ rails-deprecated_sanitizer (1.0.3)
101
+ activesupport (>= 4.2.0.alpha)
102
+ rails-dom-testing (1.0.6)
103
+ activesupport (>= 4.2.0.beta, < 5.0)
104
+ nokogiri (~> 1.6.0)
105
+ rails-deprecated_sanitizer (>= 1.0.1)
106
+ rails-html-sanitizer (1.0.2)
107
+ loofah (~> 2.0)
108
+ railties (4.2.1)
109
+ actionpack (= 4.2.1)
110
+ activesupport (= 4.2.1)
111
+ rake (>= 0.8.7)
112
+ thor (>= 0.18.1, < 2.0)
113
+ rake (10.4.2)
114
+ rspec (3.2.0)
115
+ rspec-core (~> 3.2.0)
116
+ rspec-expectations (~> 3.2.0)
117
+ rspec-mocks (~> 3.2.0)
118
+ rspec-core (3.2.1)
119
+ rspec-support (~> 3.2.0)
120
+ rspec-expectations (3.2.0)
121
+ diff-lcs (>= 1.2.0, < 2.0)
122
+ rspec-support (~> 3.2.0)
123
+ rspec-mocks (3.2.1)
124
+ diff-lcs (>= 1.2.0, < 2.0)
125
+ rspec-support (~> 3.2.0)
126
+ rspec-support (3.2.2)
127
+ slop (3.6.0)
128
+ sprockets (2.12.3)
129
+ hike (~> 1.2)
130
+ multi_json (~> 1.0)
131
+ rack (~> 1.0)
132
+ tilt (~> 1.1, != 1.3.0)
133
+ sprockets-rails (2.2.4)
134
+ actionpack (>= 3.0)
135
+ activesupport (>= 3.0)
136
+ sprockets (>= 2.8, < 4.0)
137
+ sqlite3 (1.3.10)
138
+ thor (0.19.1)
139
+ thread_safe (0.3.5)
140
+ tilt (1.4.1)
141
+ tzinfo (1.2.2)
142
+ thread_safe (~> 0.1)
143
+
144
+ PLATFORMS
145
+ ruby
146
+
147
+ DEPENDENCIES
148
+ active_sanitization!
149
+ activerecord (~> 4.2.1)
150
+ bundler (~> 1.8.3)
151
+ byebug (~> 4.0.4)
152
+ mysql2 (~> 0.3.18)
153
+ pry (~> 0.10.1)
154
+ rails (~> 4.2.1)
155
+ rake (~> 10.0)
156
+ rspec (~> 3.2.0)
157
+ sqlite3 (~> 1.3.10)
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ The MIT License (MIT)
2
+  
3
+ Copyright (c) 2014 RightScale, Inc, All Rights Reserved Worldwide.
4
+  
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+ The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,194 @@
1
+ # Active Sanitization
2
+
3
+ Active Sanitization provides an easy way to sanitize your mysql database. By using the configuration options you are able customize what gets sanitized, truncated and ignored. You can also provide S3 creds, that will allow the sanitized snapshot to be uploaded. This makes it easy for everyone to get access to this snapshot without having to go through production access gatekeeper.
4
+
5
+ ## Features
6
+ ### How it works
7
+ Active Sanitization works by copying your current database into a temporary one. Once this is done it allows a series of santizations to be performed.
8
+ Before any of this happens it checks that the database doesn't contain any extra tables or columns that it doesn't know about. This means that there is no way an extra column can be added without the correct sanitization being added. (Please see the Sanitization tests section for more details).
9
+
10
+ Once all the pre_sanitization checks have been performed and passed, then the sanitization process can begin. Active Sanitization will copy the content of all tables that require sanitizationand all tables that should be truncated (without data) to a temporary database. The sanitization process is relatively simple. It will loop through all tables, and if that table has any columns that require sanitization then it will do that. It does that by swapping all distinct values for each column that requires sanitization with a random value from the allowed sanitized substitutes. Once it has done this it will call through to a Custom Santization class that you provide. (Please see usage for how this works).
11
+
12
+ After all tables have been sanitized then the the temporary table will be exported to a tmp directory, before being dropped.
13
+
14
+ ### S3 Upload
15
+
16
+ There is an option to upload the sanitized snapshot to S3, and fetch it again. To do this simply provide the following values in the ActiveSantization config
17
+ ```
18
+ s3_bucket
19
+ s3_bucket_region
20
+ aws_access_key_id
21
+ aws_secret_access_key
22
+ ```
23
+
24
+ ## Installation
25
+
26
+ Add this line to your application's Gemfile:
27
+
28
+ ```ruby
29
+ gem 'active_sanitization'
30
+ ```
31
+
32
+ And then execute:
33
+
34
+ $ bundle
35
+
36
+ Or install it yourself as:
37
+
38
+ $ gem install active_sanitization
39
+
40
+ ## Usage
41
+
42
+ To use Active Sanitization add the following to your Rakefile
43
+ ```
44
+ require 'active_sanitization'
45
+ ```
46
+
47
+ Next you will need to configure the gem. This should be done in an initializer:
48
+ ```ruby
49
+ # After going through a tables standard sanitization ActiveSanitization can call out to a custom bit of code.
50
+ # This allows you to provide varies custom sanitizations that require a bit more in depth knowledge of the data involved.
51
+ # To do this you should provide a class, that contains the custom sanitization code. ActiveSantization will call any function named
52
+ # `santize_TABLE_NAME`, and will pass is an ActiveRecord::Base.connection to the temporary database (that contains data that is being sanitized)
53
+ # The example bellow removes all rows from the `people` table who have an age less than 22.
54
+ class CustomSanitization
55
+ def self.sanitize_person(temp_db_connection)
56
+ temp_db_connection.execute("DELETE FROM people WHERE age < 22")
57
+ end
58
+ end
59
+
60
+ ActiveSanitization.configure do |config|
61
+ # Tables that need to be sanitized
62
+ # This is a hash, where the key is each table name, and the value being an array of
63
+ # of all the columns in that table
64
+ config.tables_to_sanitize = {
65
+ "people" => ["address", "age", "gender", "id", "name"],
66
+ "cars" => ["id", "make", "model", "number_of_doors"],
67
+ }
68
+
69
+ # Tables that need to be truncated
70
+ # This is a hash, where the key is each table name, and the value being an array of
71
+ # of all the columns in that table
72
+ # After the sanitization process these tables will exist in the database dump, but they will be empty
73
+ config.tables_to_truncate = {
74
+ "hotels" => ["address", "id", "name", "number_of_rooms"]
75
+ }
76
+
77
+ # This is an array of all the other tables in the database that will be ignored.
78
+ # These will not be exported or have any sanitization applied to them
79
+ config.tables_to_ignore = []
80
+
81
+ # This is a hash of standard sanitizations that are applied to all columns with the same name as the key
82
+ # This is a hash where the key is the name of the column that needs to be sanitized, and the values what
83
+ # the values in the column are going to be replaced with.
84
+ # For example every column called `name` will have the values replaced randomly as on the following values `['Tony', 'Adam', 'Claire', 'Sarah']`
85
+ config.sanitization_columns = {
86
+ 'name' => ['Tony', 'Adam', 'Claire', 'Sarah'],
87
+ 'make' => ['BMW', 'Toyota']
88
+ }
89
+
90
+ # This is the active_record_connection to the mysql database. The connection to the correct database should already be established
91
+ config.active_record_connection = ActiveRecord::Base.connection
92
+
93
+ # The current environment that your application is running in
94
+ config.env = "test"
95
+
96
+ # This is the database config, that ActiveRecord used to connect to the database.
97
+ # This is needed so we can establish a second connection to a temporary database (where we perform the sanitization)
98
+ config.db_config = {
99
+ 'host' => "localhost",
100
+ 'username' => "root",
101
+ 'password' => nil,
102
+ 'database' => "active_sanitization",
103
+ 'adapter' => "mysql2",
104
+ }
105
+
106
+ # The name of your app
107
+ config.app_name = 'super_secret_app'
108
+
109
+ # The logger that the gem should use.
110
+ # This will default to STOUT if non is provided
111
+ config.logger = Rails.logger
112
+
113
+ # The path to the root of your project
114
+ # This is required so the database dump can be put in a tmp folder
115
+ config.root = File.dirname(File.dirname(__FILE__))
116
+
117
+ # This is a class that you provide to do custom sanitization
118
+ config.custom_sanitization = CustomSanitization
119
+
120
+ # Upload to S3.
121
+ # There is an option to upload the sanitized database dump
122
+ config.s3_bucket = S3_BUCKET
123
+ config.s3_bucket_region = 'us-east-1'
124
+ config.aws_access_key_id = AWS_ACCESS_KEY_ID
125
+ config.aws_secret_access_key = AWS_SECRET_ACCESS_KEY
126
+ end
127
+ ```
128
+ ### Sanitization tests
129
+
130
+ Active Sanitization provides an easy way to add a test that will fail if the config is not updated to include all tables and columns.
131
+ This can be done like:
132
+ ```
133
+ require 'spec_helper'
134
+
135
+ describe ActiveSanitization do
136
+ context ".pre_sanitization_checks" do
137
+ context "no new tables or columns have been added" do
138
+ # This will fail if a new column or table has been added but the hashes haven't been updated
139
+ it "doesn't stop" do
140
+ expect(ActiveSanitization.pre_sanitization_checks).to eq({
141
+ :pass => true
142
+ })
143
+ end
144
+ end
145
+ end
146
+ end
147
+ ```
148
+
149
+ It is also good to add tests for any custom sanitizations that you add. Once you have loaded your data into the database, you can call:
150
+ ```
151
+ @temp_db, @temp_db_connection, @temp_db_config = ActiveSanitization.duplicate_database
152
+ ActiveSanitization.sanitize_tables(@temp_db_connection)
153
+ ```
154
+ This will duplicate the database and call the sanitization code. After this has run you should assert that your custom sanitization has performed as expected.
155
+
156
+ ## Actually using the gem
157
+
158
+ This can be done using two rake tasks that ActiveSanitization provides.
159
+ ```
160
+ rake active_sanitization:import_data_from_s3[env,timestamp]
161
+ Import sanitized data from S3 into MySQL. Optional arguments are `env` and `timestamp`. These will default to 'production' and the latest snapshot if they are not provided
162
+
163
+ rake active_sanitization:sanitize_and_export_data
164
+ Sanitises MySQL database. If S3 creds are provided then the sanitized snapshot will be uploaded to S3
165
+ ```
166
+
167
+ ## Running the specs
168
+
169
+ This is the default rake task so you can run the specs in any of the following ways:
170
+
171
+ ```bash
172
+ bundle exec rake
173
+ bundle exec rake spec
174
+ ```
175
+
176
+ ## Getting a console
177
+
178
+ The project is currently using pry. In order to get a console in the context of the project just run the pry.rb file in ruby.
179
+
180
+ ```bash
181
+ bundle exec rake console
182
+ ```
183
+
184
+ ## Contributing
185
+
186
+ 1. Fork it ( https://github.com/[my-github-username]/active_sanitization/fork )
187
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
188
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
189
+ 4. Push to the branch (`git push origin my-new-feature`)
190
+ 5. Create a new Pull Request
191
+
192
+ ## Maintained by
193
+
194
+ - [Stephen Haley](https://github.com/shaley91)
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ require 'active_sanitization'
5
+
6
+ RSpec::Core::RakeTask.new('spec')
7
+ Dir[File.dirname(__FILE__) + '/lib/tasks/**/*.rake'].each { |file| import file }
8
+
9
+ task :default => :spec
10
+
11
+ desc "console"
12
+ task :console do
13
+ require 'pry'
14
+ binding.pry
15
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'active_sanitization/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "active_sanitization"
8
+ spec.version = ActiveSanitization::VERSION
9
+ spec.authors = ["Stephen Haley", "Callum Dryden"]
10
+ spec.email = ["stephen.haley@rightscale.com", "callum.dryden@rightscale.com"]
11
+
12
+ spec.summary = %q{Quick in-place santization for your MySql DB}
13
+ spec.description = %q{Active Santization provides any easy way to consistently sanitize data from a MySql DB. With the config you can specify how to sanitize each tables and column.}
14
+ spec.homepage = "https://github.com/rightscale/active_sanitization"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.8.3"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.2.0"
25
+ spec.add_development_dependency "rails", "~> 4.2.1"
26
+ spec.add_development_dependency "pry", "~> 0.10.1"
27
+ spec.add_development_dependency "sqlite3", "~> 1.3.10"
28
+ spec.add_development_dependency "activerecord", "~> 4.2.1"
29
+ spec.add_development_dependency "mysql2", "~> 0.3.18"
30
+ spec.add_development_dependency "byebug", "~> 4.0.4"
31
+
32
+ spec.add_runtime_dependency "aws-sdk", "~> 2.0.33"
33
+ end
@@ -0,0 +1,3 @@
1
+ module ActiveSanitization
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,293 @@
1
+ require "active_sanitization/version"
2
+ require_relative "tasks/rake_tasks"
3
+ require "active_record"
4
+ require "active_support"
5
+ require 'aws-sdk'
6
+
7
+ module ActiveSanitization
8
+ class << self
9
+ attr_accessor :configuration
10
+ end
11
+
12
+ def self.configure
13
+ self.configuration ||= Configuration.new
14
+ yield(configuration)
15
+ end
16
+
17
+ class Configuration
18
+ attr_accessor :tables_to_sanitize, :tables_to_truncate, :tables_to_ignore, :sanitization_columns, :s3_bucket, :app_name, :aws_access_key_id, :aws_secret_access_key, :env, :active_record_connection, :db_config, :custom_sanitization, :logger, :root, :s3_bucket_region
19
+
20
+ def initialize
21
+ @tables_to_sanitize = {}
22
+ @tables_to_truncate = {}
23
+ @tables_to_ignore = {}
24
+ @sanitization_columns = {}
25
+ @s3_bucket = 'active_sanitization'
26
+ @env = ENV['RACK_ENV'] || ENV['RAILS_ENV']
27
+ @active_record_connection = ActiveRecord::Base.connection
28
+ @root = File.dirname(File.dirname(__FILE__))
29
+ @logger = Logger.new(STDOUT)
30
+ end
31
+ end
32
+
33
+ # Need to create a second ActiveRecord::Base.connection so we can
34
+ # connect to the primary and copy DB.
35
+ class TempDatabaseConnection < ActiveRecord::Base
36
+ def self.abstract_class?
37
+ true # So it gets its own connection
38
+ end
39
+ end
40
+
41
+ # Returns a hash that represents the difference between two hashes.
42
+ #
43
+ # hash_diff({1 => 2}, {1 => 2}) # => {}
44
+ # hash_diff({1 => 2}, {1 => 3}) # => {1 => 2}
45
+ # hash_diff({}, {1 => 2}) # => {1 => 2}
46
+ # hash_diff({1 => 2, 3 => 4}, {1 => 2}) # => {3 => 4}
47
+ def self.hash_diff(hash1, hash2)
48
+ difference1 = hash1.dup
49
+ difference2 = hash2.dup
50
+
51
+ difference1.delete_if do |key, value|
52
+ hash2[key] == value
53
+ end
54
+
55
+ difference2.delete_if do |key, value|
56
+ hash1.has_key?(key)
57
+ end
58
+
59
+ difference1.merge(difference2)
60
+ end
61
+
62
+ def self.log(output)
63
+ self.configuration.logger.info(output) unless self.configuration.env == 'test'
64
+ end
65
+
66
+ def self.pre_sanitization_checks
67
+ db_tables = {}
68
+ self.configuration.active_record_connection.tables.each do |table_name|
69
+ next if self.configuration.tables_to_ignore.include?(table_name)
70
+ db_tables[table_name] = []
71
+ self.configuration.active_record_connection.columns(table_name).each { |c| db_tables[table_name] << c.name }
72
+ db_tables[table_name].sort!
73
+ end
74
+
75
+ # diff will only work correctly if the columns are sorted the same
76
+ tables_with_sorted_columns = {}
77
+ self.configuration.tables_to_sanitize.merge(self.configuration.tables_to_truncate).each { |k, v| tables_with_sorted_columns[k] = v.sort }
78
+ table_difference = hash_diff(db_tables, tables_with_sorted_columns)
79
+ checks = {}
80
+ if table_difference != {}
81
+ column_difference = {}
82
+ table_difference.collect do |table_name, table_columns|
83
+ column_difference[table_name] = table_columns - self.configuration.tables_to_sanitize.merge(self.configuration.tables_to_truncate)[table_name].to_a
84
+ end
85
+ checks[:pass] = false
86
+ checks[:error] = "The following tables or columns have been found in the #{self.configuration.env} DB but are not known to this script (#{column_difference}).\n Please update the active_sanitization config!"
87
+ else
88
+ checks[:pass] = true
89
+ end
90
+ checks
91
+ end
92
+
93
+ def self.duplicate_database
94
+ temp_db = "#{self.configuration.db_config['database']}_copy"
95
+
96
+ self.log("Deleting temp DB if exists")
97
+ self.configuration.active_record_connection.execute("DROP DATABASE IF EXISTS #{temp_db};")
98
+ self.log("Creating temp DB")
99
+ self.configuration.active_record_connection.execute("CREATE DATABASE #{temp_db}")
100
+ self.log("Copying #{self.configuration.env} DB to temp DB")
101
+ self.log("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} #{self.configuration.tables_to_sanitize.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
102
+ system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} #{self.configuration.tables_to_sanitize.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
103
+ if $?.exitstatus == 0
104
+ self.log("Temp DB created and populated")
105
+ else
106
+ raise "Failed to load DB #{self.configuration.db_config} into temp DB #{temp_db}."
107
+ end
108
+
109
+ self.log("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} --no-data #{self.configuration.db_config['database']} #{self.configuration.tables_to_truncate.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
110
+ system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} --no-data #{self.configuration.db_config['database']} #{self.configuration.tables_to_truncate.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
111
+ if $?.exitstatus == 0
112
+ self.log("Temp DB created and populated")
113
+ else
114
+ raise "Failed to load DB #{self.configuration.db_config} into temp DB #{temp_db}."
115
+ end
116
+
117
+ temp_db_config = self.configuration.db_config.dup
118
+ temp_db_config['database'] = temp_db
119
+ TempDatabaseConnection.establish_connection(temp_db_config)
120
+ temp_db_connection = TempDatabaseConnection.connection
121
+
122
+ [temp_db, temp_db_connection, temp_db_config]
123
+ end
124
+
125
+ def self.sanitize_table(table, temp_db_connection)
126
+ table_columns = temp_db_connection.select_values("DESCRIBE #{table};")
127
+ self.configuration.sanitization_columns.keys.each do |column|
128
+ if table_columns.include?(column)
129
+ distinct_values = temp_db_connection.execute("SELECT DISTINCT(#{column}) FROM #{table};").collect { |data| data.first }
130
+ distinct_values.each do |value|
131
+ temp_db_connection.execute("UPDATE #{table} SET #{column}='#{self.configuration.sanitization_columns[column].sample}' WHERE #{column}=#{ActiveRecord::Base.sanitize(value)};")
132
+ end
133
+ end
134
+ end
135
+
136
+ # Run any custom sanitization for the table
137
+ self.configuration.custom_sanitization.send("sanitize_#{table}", temp_db_connection) if self.configuration.custom_sanitization.respond_to?("sanitize_#{table}")
138
+ end
139
+
140
+ def self.create_files
141
+ dump_file = "#{File.join(self.configuration.root, "tmp")}/data.dump"
142
+ compressed_dump_file = "#{dump_file}.gz"
143
+ File.new(dump_file, "w+")
144
+ File.new(compressed_dump_file, "w+")
145
+ [dump_file, compressed_dump_file]
146
+ end
147
+
148
+ def self.sanitize_tables(temp_db_connection)
149
+ self.log("Processing TABLES_TO_TRUNCATE...")
150
+ self.configuration.tables_to_truncate.keys.each do |table|
151
+ self.log("Truncating #{table}")
152
+ temp_db_connection.execute("TRUNCATE #{table};")
153
+ end
154
+
155
+ self.log("Processing TABLES_TO_SANITIZE...")
156
+ self.configuration.tables_to_sanitize.keys.each do |table|
157
+ self.log("Sanitizing #{table}")
158
+ self.sanitize_table(table, temp_db_connection)
159
+ end
160
+ end
161
+
162
+ def self.clean_up_temp_db(temp_db)
163
+ self.log("Dropping #{temp_db}")
164
+ self.configuration.active_record_connection.execute("DROP DATABASE #{temp_db};")
165
+ end
166
+
167
+ def self.gzip(dump_file)
168
+ self.log("Gzipping #{dump_file}")
169
+ system("gzip '#{dump_file}'")
170
+ end
171
+
172
+ def self.get_s3_bucket
173
+ creds = Aws::Credentials.new(self.configuration.aws_access_key_id, self.configuration.aws_secret_access_key)
174
+ client = Aws::S3::Client.new(credentials: creds, region: self.configuration.s3_bucket_region)
175
+ resource = Aws::S3::Resource.new(client: client)
176
+ resource.bucket(self.configuration.s3_bucket)
177
+ end
178
+
179
+ def self.upload(compressed_dump_file)
180
+ timestamp = DateTime.now.strftime('%Y%m%d%H%M%S')
181
+ name = "#{self.configuration.app_name}/#{self.configuration.env}/mysql/#{timestamp}/#{File.basename(compressed_dump_file)}"
182
+ file = File.open(compressed_dump_file, 'r')
183
+
184
+ bucket = get_s3_bucket
185
+ obj = bucket.object(name)
186
+ obj.put(body: file)
187
+
188
+ file.close
189
+ File.unlink(compressed_dump_file)
190
+
191
+ obj
192
+ end
193
+
194
+ def self.clean_up_files(dump_file, compressed_dump_file)
195
+ self.log("Deleting #{dump_file}")
196
+ File.delete(dump_file) if File.exist?(dump_file)
197
+ self.log("Deleting #{compressed_dump_file}")
198
+ File.delete(compressed_dump_file) if File.exist?(compressed_dump_file)
199
+ end
200
+
201
+ def self.export_temp_db_to_file(dump_file, temp_db_config, temp_db)
202
+ self.log("Dumping temp DB to #{dump_file}")
203
+ system("mysqldump -h #{temp_db_config['host']} -u #{temp_db_config['username']} --password=#{temp_db_config['password']} #{temp_db} >> '#{dump_file}'")
204
+ if $?.exitstatus == 0
205
+ self.log("Dump created")
206
+ else
207
+ self.log("Failed to create dump")
208
+ return
209
+ end
210
+ end
211
+
212
+ def self.is_dev_or_integration_env?
213
+ self.configuration.env == 'development' || self.configuration.env == 'integration'
214
+ end
215
+
216
+ def self.sanitize_and_export_data
217
+ checks = self.pre_sanitization_checks
218
+ if checks[:pass]
219
+ dump_file, compressed_dump_file = self.create_files
220
+ self.clean_up_files(dump_file, compressed_dump_file)
221
+
222
+ # If in dev or integration env we don't need to sanatise the DB so we should
223
+ # just dump it to a file and upload
224
+ if self.is_dev_or_integration_env?
225
+ self.export_temp_db_to_file(dump_file, self.configuration.db_config, self.configuration.db_config["database"])
226
+ else
227
+ temp_db, temp_db_connection, temp_db_config = self.duplicate_database
228
+
229
+ self.sanitize_tables(temp_db_connection)
230
+
231
+ self.export_temp_db_to_file(dump_file, temp_db_config, temp_db)
232
+
233
+ self.clean_up_temp_db(temp_db)
234
+ end
235
+ self.gzip(dump_file)
236
+ self.upload(compressed_dump_file) if self.configuration.s3_bucket && self.configuration.aws_access_key_id && self.configuration.aws_secret_access_key
237
+ self.clean_up_files(dump_file, compressed_dump_file) unless self.configuration.s3_bucket && self.configuration.aws_access_key_id && self.configuration.aws_secret_access_key
238
+ self.log("-- DONE --")
239
+ else
240
+ self.log(checks[:error])
241
+ end
242
+ end
243
+
244
+ def self.import_data(env = nil, timestamp = nil)
245
+ env = "production" if env.nil?
246
+ prefix = "#{self.configuration.app_name}/#{env}/mysql"
247
+
248
+ bucket = get_s3_bucket
249
+ if timestamp.nil?
250
+ timestamp = bucket.objects(prefix: prefix).collect {|x| x.key[%r(#{prefix}\/(.*)\/), 1] }.max
251
+ end
252
+
253
+ # Check that there are files (as the user could have passed in an incorrect timestamp)
254
+ if timestamp.nil?
255
+ self.log("No mysql snapshot for timestamp #{prefix}/#{timestamp}")
256
+ return
257
+ end
258
+
259
+ self.log('WARNING: this rake task will dump your MySQL DB to tmp, then wipe your DB before importing a snapshot')
260
+ local_dump_file = "#{File.join(self.configuration.root, "tmp")}/local_data.dump"
261
+
262
+ # Make copy of local DB just in case something goes wrong
263
+ system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} > '#{local_dump_file}'")
264
+ if $?.exitstatus == 0
265
+ self.log("Local DB dump stored in #{local_dump_file}")
266
+ else
267
+ raise "Failed to create a local DB dump. If a previous local dump exists, please delete it and try again."
268
+ end
269
+
270
+ # get all the files in the snapshot
271
+ objects = bucket.objects("#{prefix}/#{timestamp}")
272
+ dump_file = "#{File.join(self.configuration.root, "tmp")}/data.dump"
273
+ compressed_dump_file = "#{dump_file}.gz"
274
+ self.log("Downloading file to #{compressed_dump_file}")
275
+ url = objects.first.object.presigned_url(:get, expires_in: 600)
276
+ system("curl -o #{compressed_dump_file} '#{url}'")
277
+
278
+ # reset db
279
+ self.log("Recreating your local DB")
280
+ Rake::Task["db:drop"].invoke
281
+ Rake::Task["db:create"].invoke
282
+
283
+ # Import data
284
+ self.log("Unzipping and importing data...")
285
+ system("gunzip -c '#{compressed_dump_file}' | mysql -uroot #{self.configuration.db_config['database']}")
286
+ if $?.exitstatus == 0
287
+ File.delete(compressed_dump_file) if File.exist?(compressed_dump_file)
288
+ else
289
+ raise "Could not load #{compressed_dump_file} into DB #{self.configuration.db_config}"
290
+ end
291
+ self.log('-- DONE --')
292
+ end
293
+ end
@@ -0,0 +1,23 @@
1
+ require 'rake'
2
+
3
+ module ActiveSanitization
4
+ class RakeTasks
5
+ include Rake::DSL if defined? Rake::DSL
6
+
7
+ def install_tasks
8
+ namespace:active_sanitization do
9
+ desc "Sanitises MySQL database. If S3 creds are provided then the sanitized snapshot will be uploaded to S3"
10
+ task :sanitize_and_export_data => :environment do
11
+ ActiveSanitization.sanitize_and_export_data
12
+ end
13
+
14
+ desc "Import sanitized data from S3 into MySQL. Optional arguments are `env` and `timestamp`. These will default to 'production' and the latest snapshot if they are not provided"
15
+ task :import_data_from_s3, [:env, :timestamp] => [:environment] do |t, args|
16
+ ActiveSanitization.import_data(args[:env], args[:timestamp])
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+
23
+ ActiveSanitization::RakeTasks.new.install_tasks
metadata ADDED
@@ -0,0 +1,200 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_sanitization
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Stephen Haley
8
+ - Callum Dryden
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2015-04-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: 1.8.3
21
+ type: :development
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: 1.8.3
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '10.0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '10.0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: 3.2.0
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: 3.2.0
56
+ - !ruby/object:Gem::Dependency
57
+ name: rails
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: 4.2.1
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 4.2.1
70
+ - !ruby/object:Gem::Dependency
71
+ name: pry
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: 0.10.1
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: 0.10.1
84
+ - !ruby/object:Gem::Dependency
85
+ name: sqlite3
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: 1.3.10
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: 1.3.10
98
+ - !ruby/object:Gem::Dependency
99
+ name: activerecord
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: 4.2.1
105
+ type: :development
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: 4.2.1
112
+ - !ruby/object:Gem::Dependency
113
+ name: mysql2
114
+ requirement: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: 0.3.18
119
+ type: :development
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - "~>"
124
+ - !ruby/object:Gem::Version
125
+ version: 0.3.18
126
+ - !ruby/object:Gem::Dependency
127
+ name: byebug
128
+ requirement: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - "~>"
131
+ - !ruby/object:Gem::Version
132
+ version: 4.0.4
133
+ type: :development
134
+ prerelease: false
135
+ version_requirements: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - "~>"
138
+ - !ruby/object:Gem::Version
139
+ version: 4.0.4
140
+ - !ruby/object:Gem::Dependency
141
+ name: aws-sdk
142
+ requirement: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - "~>"
145
+ - !ruby/object:Gem::Version
146
+ version: 2.0.33
147
+ type: :runtime
148
+ prerelease: false
149
+ version_requirements: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - "~>"
152
+ - !ruby/object:Gem::Version
153
+ version: 2.0.33
154
+ description: Active Santization provides any easy way to consistently sanitize data
155
+ from a MySql DB. With the config you can specify how to sanitize each tables and
156
+ column.
157
+ email:
158
+ - stephen.haley@rightscale.com
159
+ - callum.dryden@rightscale.com
160
+ executables: []
161
+ extensions: []
162
+ extra_rdoc_files: []
163
+ files:
164
+ - ".rspec"
165
+ - ".travis.yml"
166
+ - CHANGELOG.md
167
+ - Gemfile
168
+ - Gemfile.lock
169
+ - LICENSE.txt
170
+ - README.md
171
+ - Rakefile
172
+ - active_sanitization.gemspec
173
+ - lib/active_sanitization.rb
174
+ - lib/active_sanitization/version.rb
175
+ - lib/tasks/rake_tasks.rb
176
+ homepage: https://github.com/rightscale/active_sanitization
177
+ licenses:
178
+ - MIT
179
+ metadata: {}
180
+ post_install_message:
181
+ rdoc_options: []
182
+ require_paths:
183
+ - lib
184
+ required_ruby_version: !ruby/object:Gem::Requirement
185
+ requirements:
186
+ - - ">="
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ required_rubygems_version: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - ">="
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
194
+ requirements: []
195
+ rubyforge_project:
196
+ rubygems_version: 2.2.2
197
+ signing_key:
198
+ specification_version: 4
199
+ summary: Quick in-place santization for your MySql DB
200
+ test_files: []