iterationlabs-my_obfuscate 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ .idea
7
+ .rvmrc
8
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in my_obfuscate.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2009 Honk
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
@@ -0,0 +1,85 @@
1
+ = my_obfuscate
2
+
3
+ Standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. Supports MySQL and SQL Server.
4
+
5
+ = Install
6
+
7
+ sudo gem install my_obfuscate
8
+
9
+ = Example Usage
10
+
11
+ Make an obfuscator.rb script:
12
+
13
+ #!/usr/bin/env ruby
14
+ require "rubygems"
15
+ require "my_obfuscate"
16
+
17
+ obfuscator = MyObfuscate.new({
18
+ :people => {
19
+ :email => { :type => :email, :skip_regexes => [/^[\w\.\_]+@my_company\.com$/i] },
20
+ :ethnicity => :keep,
21
+ :crypted_password => { :type => :fixed, :string => "SOME_FIXED_PASSWORD_FOR_EASE_OF_DEBUGGING" },
22
+ :salt => { :type => :fixed, :string => "SOME_THING" },
23
+ :remember_token => :null,
24
+ :remember_token_expires_at => :null,
25
+ :age => { :type => :null, :unless => lambda { |person| person[:email] == "hello@example.com" } },
26
+ :photo_file_name => :null,
27
+ :photo_content_type => :null,
28
+ :photo_file_size => :null,
29
+ :photo_updated_at => :null,
30
+ :postal_code => { :type => :fixed, :string => "94109", :unless => lambda {|person| person[:postal_code] == "12345"} },
31
+ :name => :name,
32
+ :full_address => :address,
33
+ :bio => { :type => :lorem, :number => 4 },
34
+ :relationship_status => { :type => :fixed, :one_of => ["Single", "Divorced", "Married", "Engaged", "In a Relationship"] },
35
+ :has_children => { :type => :integer, :between => 0..1 },
36
+ },
37
+
38
+ :invites => :truncate,
39
+ :invite_requests => :truncate,
40
+ :tags => :keep,
41
+
42
+ :relationships => {
43
+ :account_id => :keep,
44
+ :code => { :type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS }
45
+ }
46
+ })
47
+ obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
48
+ obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
49
+ obfuscator.obfuscate(STDIN, STDOUT)
50
+
51
+ And to get an obfuscated dump:
52
+ mysqldump -c --add-drop-table -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
53
+ Note that the -c option on mysqldump is required to use my_obfuscator.
54
+
55
+ == Database Server
56
+
57
+ By default the database type is assumed to be MySQL, but you can use the
58
+ builtin SQL Server support by specifying:
59
+
60
+ obfuscator.database_type = :sql_server
61
+
62
+ == Changes
63
+
64
+ * Support for SQL Server
65
+ * :unless and :if now support :nil as a shorthand for a Proc that checks for nil
66
+ * :name, :lorem, and :address are all now supported types. You can pass :number to :lorem to specify how many sentences to generate. The default is one.
67
+ * <tt>{ :type => :whatever }</tt> is now optional when no additional options are needed. Just use <tt>:whatever</tt>.
68
+ * Warnings are thrown when an unknown column type or table is encountered. Use <tt>:keep</tt> in both cases.
69
+ * <tt>{ :type => :fixed, :string => Proc { |row| ... } }</tt> is now available.
70
+
71
+ == Note on Patches/Pull Requests
72
+
73
+ * Fork the project.
74
+ * Make your feature addition or bug fix.
75
+ * Add tests for it. This is important so I don't break it in a future version unintentionally.
76
+ * Commit, do not mess with rakefile, version, or history. (If you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
77
+ * Send me a pull request. Bonus points for topic branches.
78
+
79
+ == Thanks
80
+
81
+ Thanks to Mavenlink and Pivotal Labs for patches and updates!
82
+
83
+ == Copyright
84
+
85
+ Copyright (c) 2009 Honk. Now maintained by Iteration Labs, LLC. See LICENSE for details.
@@ -0,0 +1,8 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec) do |spec|
5
+ spec.pattern = FileList['spec/**/*_spec.rb']
6
+ end
7
+
8
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "my_obfuscate/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = %q{iterationlabs-my_obfuscate}
7
+ s.version = MyObfuscate::VERSION
8
+
9
+ s.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves", "Greg Bell", "Mavenlink"]
10
+ s.description = %q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
11
+ s.email = %q{andrew@iterationlabs.com}
12
+ s.homepage = %q{http://github.com/iterationlabs/myobfuscate}
13
+ s.summary = %q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
14
+
15
+ s.add_development_dependency "rspec"
16
+ s.add_dependency "faker", "=0.9.5"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+ end
@@ -0,0 +1,213 @@
1
+ require 'jcode' if RUBY_VERSION < '1.9'
2
+ require 'faker'
3
+ require 'my_obfuscate/mysql'
4
+ require 'my_obfuscate/sql_server'
5
+
6
+ # Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
7
+ # column names in the insert statements.
8
+ class MyObfuscate
9
+ attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
10
+
11
+ NUMBER_CHARS = "1234567890"
12
+ USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
13
+ SENSIBLE_CHARS = USERNAME_CHARS + '+-=[{]}/?|!@#$%^&*()`~'
14
+
15
+ # Make a new MyObfuscate object. Pass in a configuration structure to define how the obfuscation should be
16
+ # performed. See the README.rdoc file for more information.
17
+ def initialize(configuration = {})
18
+ @config = configuration
19
+ end
20
+
21
+ def fail_on_unspecified_columns?
22
+ @fail_on_unspecified_columns
23
+ end
24
+
25
+ def database_helper
26
+ if @database_helper.nil?
27
+ if @database_type == :sql_server
28
+ @database_helper = SqlServer.new
29
+ else
30
+ @database_helper = Mysql.new
31
+ end
32
+ end
33
+
34
+ @database_helper
35
+ end
36
+
37
+ # Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
38
+ # or STDIN and STDOUT.
39
+ def obfuscate(input_io, output_io)
40
+
41
+ # We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
42
+ input_io.each do |line|
43
+ if table_data = database_helper.parse_insert_statement(line)
44
+ table_name = table_data[:table_name]
45
+ columns = table_data[:column_names]
46
+ if config[table_name]
47
+ output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
48
+ else
49
+ $stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
50
+ output_io.write line
51
+ end
52
+ else
53
+ output_io.write line
54
+ end
55
+ end
56
+ end
57
+
58
+ def reassembling_each_insert(line, table_name, columns)
59
+ output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
60
+ result = yield(sub_insert)
61
+ result = result.map do |i|
62
+ database_helper.make_valid_value_string(i)
63
+ end
64
+ result = result.join(",")
65
+ "(" + result + ")"
66
+ end.join(",")
67
+ database_helper.make_insert_statement(table_name, columns, output)
68
+ end
69
+
70
+ def self.row_as_hash(row, columns)
71
+ columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
72
+ end
73
+
74
+ def self.make_conditional_method(conditional_method, index, row)
75
+ if conditional_method.is_a?(Symbol)
76
+ if conditional_method == :blank
77
+ conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
78
+ elsif conditional_method == :nil
79
+ conditional_method = lambda { |row_hash| row[index].nil? }
80
+ end
81
+ end
82
+ conditional_method
83
+ end
84
+
85
+ def self.apply_table_config(row, table_config, columns)
86
+ return row unless table_config.is_a?(Hash)
87
+ row_hash = row_as_hash(row, columns)
88
+
89
+ table_config.each do |column, definition|
90
+ index = columns.index(column)
91
+
92
+ definition = { :type => definition } if definition.is_a?(Symbol)
93
+
94
+ if definition.has_key?(:unless)
95
+ unless_check = make_conditional_method(definition[:unless], index, row)
96
+
97
+ next if unless_check.call(row_hash)
98
+ end
99
+
100
+
101
+ if definition.has_key?(:if)
102
+ if_check = make_conditional_method(definition[:if], index, row)
103
+
104
+ next unless if_check.call(row_hash)
105
+ end
106
+
107
+ if definition[:skip_regexes]
108
+ next if definition[:skip_regexes].any? {|regex| row[index] =~ regex}
109
+ end
110
+
111
+ row[index.to_i] = case definition[:type]
112
+ when :email
113
+ random_string(definition[:length] || (4..10), USERNAME_CHARS) + "@example.com"
114
+ when :string
115
+ random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
116
+ when :lorem
117
+ clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
118
+ when :name
119
+ clean_quotes(Faker::Name.name)
120
+ when :first_name
121
+ clean_quotes(Faker::Name.first_name)
122
+ when :last_name
123
+ clean_quotes(Faker::Name.last_name)
124
+ when :address
125
+ clean_quotes("#{Faker::Address.street_address}\\n#{Faker::Address.city}, #{Faker::Address.state_abbr} #{Faker::Address.zip_code}")
126
+ when :street_address
127
+ clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
128
+ when :city
129
+ clean_quotes(Faker::Address.city)
130
+ when :state
131
+ Faker::Address.state_abbr
132
+ when :zip_code
133
+ Faker::Address.zip_code
134
+ when :phone
135
+ Faker::PhoneNumber.phone_number
136
+ when :integer
137
+ random_integer(definition[:between] || (0..1000)).to_s
138
+ when :fixed
139
+ if definition[:one_of]
140
+ definition[:one_of][(rand * definition[:one_of].length).to_i]
141
+ else
142
+ definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
143
+ end
144
+ when :null
145
+ nil
146
+ when :keep
147
+ row[index]
148
+ else
149
+ $stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
150
+ row[index]
151
+ end
152
+ end
153
+ row
154
+ end
155
+
156
+ def self.random_integer(between)
157
+ (between.min + (between.max - between.min) * rand).round
158
+ end
159
+
160
+ def self.random_string(length_or_range, chars)
161
+ length_or_range = (length_or_range..length_or_range) if length_or_range.is_a?(Fixnum)
162
+ times = random_integer(length_or_range)
163
+ out = ""
164
+ times.times { out << chars[rand * chars.length] }
165
+ out
166
+ end
167
+
168
+ def check_for_defined_columns_not_in_table(table_name, columns)
169
+ missing_columns = config[table_name].keys - columns
170
+ unless missing_columns.length == 0
171
+ error_message = missing_columns.map do |missing_column|
172
+ "Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
173
+ end.join("\n")
174
+ raise RuntimeError.new(error_message)
175
+ end
176
+ end
177
+
178
+ def check_for_table_columns_not_in_definition(table_name, columns)
179
+ missing_columns = columns - (config[table_name].keys + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
180
+ unless missing_columns.length == 0
181
+ error_message = missing_columns.map do |missing_column|
182
+ "Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
183
+ end.join("\n")
184
+ raise RuntimeError.new(error_message)
185
+ end
186
+ end
187
+
188
+ def obfuscate_bulk_insert_line(line, table_name, columns)
189
+ table_config = config[table_name]
190
+ if table_config == :truncate
191
+ ""
192
+ elsif table_config == :keep
193
+ line
194
+ else
195
+ check_for_defined_columns_not_in_table(table_name, columns)
196
+ check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
197
+ # Note: Remember to SQL escape strings in what you pass back.
198
+ reassembling_each_insert(line, table_name, columns) do |row|
199
+ MyObfuscate.apply_table_config(row, table_config, columns)
200
+ end
201
+ end
202
+ end
203
+
204
+ private
205
+
206
+ def self.clean_quotes(value)
207
+ value.gsub(/['"]/, '')
208
+ end
209
+
210
+ def self.clean_bad_whitespace(value)
211
+ value.gsub(/[\n\t\r]/, '')
212
+ end
213
+ end
@@ -0,0 +1,87 @@
1
+ class MyObfuscate::Mysql
2
+ INSERT_REGEX = /^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
3
+
4
+ def parse_insert_statement(line)
5
+ if regex_match = INSERT_REGEX.match(line)
6
+ {
7
+ :table_name => regex_match[1].to_sym,
8
+ :column_names => regex_match[2].split(/`\s*,\s*`/).map { |col| col.gsub('`', "").to_sym }
9
+ }
10
+ end
11
+ end
12
+
13
+ def rows_to_be_inserted(line)
14
+ line = line.gsub(INSERT_REGEX, '').gsub(/\s*;\s*$/, '')
15
+ context_aware_mysql_string_split(line)
16
+ end
17
+
18
+ def make_valid_value_string(value)
19
+ if value.nil?
20
+ "NULL"
21
+ else
22
+ "'" + value + "'"
23
+ end
24
+ end
25
+
26
+ def make_insert_statement(table_name, column_names, values_strings)
27
+ "INSERT INTO `#{table_name}` (`#{column_names.join('`, `')}`) VALUES #{values_strings};"
28
+ end
29
+
30
+ private
31
+
32
+ # Be aware, strings must be quoted in single quotes!
33
+ def context_aware_mysql_string_split(string)
34
+ in_sub_insert = false
35
+ in_quoted_string = false
36
+ escaped = false
37
+ current_field = nil
38
+ length = string.length
39
+ fields = []
40
+ output = []
41
+
42
+ string.each_char do |i|
43
+ if escaped
44
+ escaped = false
45
+ current_field ||= ""
46
+ current_field << i
47
+ else
48
+ if i == "\\"
49
+ escaped = true
50
+ current_field ||= ""
51
+ current_field << i
52
+ elsif i == "(" && !in_quoted_string && !in_sub_insert
53
+ in_sub_insert = true
54
+ elsif i == ")" && !in_quoted_string && in_sub_insert
55
+ fields << current_field unless current_field.nil?
56
+ output << fields unless fields.length == 0
57
+ in_sub_insert = false
58
+ fields = []
59
+ current_field = nil
60
+ elsif i == "'" && !in_quoted_string
61
+ fields << current_field unless current_field.nil?
62
+ current_field = ''
63
+ in_quoted_string = true
64
+ elsif i == "'" && in_quoted_string
65
+ fields << current_field unless current_field.nil?
66
+ current_field = nil
67
+ in_quoted_string = false
68
+ elsif i == "," && !in_quoted_string && in_sub_insert
69
+ fields << current_field unless current_field.nil?
70
+ current_field = nil
71
+ elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
72
+ current_field = nil
73
+ fields << current_field
74
+ elsif (i == " " || i == "\t") && !in_quoted_string
75
+ # Don't add whitespace not in a string
76
+ elsif in_sub_insert
77
+ current_field ||= ""
78
+ current_field << i
79
+ end
80
+ end
81
+ end
82
+
83
+ fields << current_field unless current_field.nil?
84
+ output << fields unless fields.length == 0
85
+ output
86
+ end
87
+ end
@@ -0,0 +1,79 @@
1
+ class MyObfuscate::SqlServer
2
+ INSERT_REGEX = /^\s*INSERT (?:INTO )?\[dbo\]\.\[(.*?)\] \((.*?)\) VALUES\s*/i
3
+
4
+ def parse_insert_statement(line)
5
+ if regex_match = INSERT_REGEX.match(line)
6
+ {
7
+ :table_name => regex_match[1].to_sym,
8
+ :column_names => regex_match[2].split(/\]\s*,\s*\[/).map { |col| col.gsub(/[\[\]]/, "").to_sym }
9
+ }
10
+ end
11
+ end
12
+
13
+ def rows_to_be_inserted(line)
14
+ line = line.gsub(INSERT_REGEX, '').gsub(/\s*;?\s*$/, '').gsub(/^\(/, '').gsub(/\)$/, '')
15
+ context_aware_sql_server_string_split(line)
16
+ end
17
+
18
+ def make_valid_value_string(value)
19
+ if value.nil?
20
+ "NULL"
21
+ elsif value.match(/^[A-Z]+\(.*?\)$/)
22
+ value
23
+ else
24
+ "N'#{value}'"
25
+ end
26
+ end
27
+
28
+ def make_insert_statement(table_name, column_names, values_strings)
29
+ "INSERT [dbo].[#{table_name}] ([#{column_names.join("], [")}]) VALUES #{values_strings};"
30
+ end
31
+
32
+ private
33
+
34
+ def context_aware_sql_server_string_split(string)
35
+ in_quoted_string = false
36
+ backslash_escape = false
37
+ previous_char_single_quote = false
38
+ current_field_value = nil
39
+ completed_fields = []
40
+
41
+ string.each_char do |char|
42
+ if char == "'" && !in_quoted_string
43
+ if current_field_value != "N"
44
+ completed_fields << current_field_value unless current_field_value.nil?
45
+ end
46
+ current_field_value = ""
47
+ in_quoted_string = true
48
+ elsif previous_char_single_quote
49
+ previous_char_single_quote = false
50
+ if char == "'"
51
+ current_field_value << "''"
52
+ else
53
+ completed_fields << current_field_value unless current_field_value.nil?
54
+ in_quoted_string = false
55
+ current_field_value = nil
56
+ end
57
+ elsif char == "'" && in_quoted_string
58
+ previous_char_single_quote = true
59
+ elsif char == "," && !in_quoted_string
60
+ completed_fields << current_field_value unless current_field_value.nil?
61
+ current_field_value = nil
62
+ elsif char == "L" && !in_quoted_string && current_field_value == "NUL"
63
+ current_field_value = nil
64
+ completed_fields << current_field_value
65
+ elsif (char == " " || char == "\t") && !in_quoted_string
66
+ if !current_field_value.nil? && current_field_value.start_with?("CAST(")
67
+ current_field_value << char
68
+ end
69
+ # Don't add whitespace not in a string
70
+ else
71
+ current_field_value ||= ""
72
+ current_field_value << char
73
+ end
74
+ end
75
+
76
+ completed_fields << current_field_value unless current_field_value.nil?
77
+ [completed_fields]
78
+ end
79
+ end
@@ -0,0 +1,3 @@
1
+ class MyObfuscate
2
+ VERSION = "0.3.3"
3
+ end
@@ -0,0 +1,531 @@
1
+ require 'spec_helper'
2
+
3
+ describe MyObfuscate do
4
+ describe "MyObfuscate.reassembling_each_insert" do
5
+ before do
6
+ @column_names = [:a, :b, :c, :d]
7
+ @test_insert = "INSERT INTO `some_table` (`a`, `b`, `c`, `d`) VALUES ('(\\'bob@bob.com','b()ob','some(thingelse1','25)('),('joe@joe.com','joe','somethingelse2','54');"
8
+ @test_insert_passes = [
9
+ ["(\\'bob@bob.com", "b()ob", "some(thingelse1", "25)("],
10
+ ["joe@joe.com", "joe", "somethingelse2", "54"]
11
+ ]
12
+ end
13
+
14
+ it "should yield each subinsert and reassemble the result" do
15
+ count = 0
16
+ reassembled = MyObfuscate.new.reassembling_each_insert(@test_insert, "some_table", @column_names) do |sub_insert|
17
+ sub_insert.should == @test_insert_passes.shift
18
+ count += 1
19
+ sub_insert
20
+ end
21
+ count.should == 2
22
+ reassembled.should == @test_insert
23
+ end
24
+ end
25
+
26
+ describe "MyObfuscate.apply_table_config" do
27
+ it "should work on email addresses" do
28
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else"], {:a => {:type => :email}}, [:a, :b])
29
+ new_row.length.should == 2
30
+ new_row.first.should =~ /^\w+\@\w+\.\w+$/
31
+ end
32
+
33
+ it "should work on strings" do
34
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "something crazy"], {:b => {:type => :string, :length => 7}}, [:a, :b, :c])
35
+ new_row.length.should == 3
36
+ new_row[1].length.should == 7
37
+ new_row[1].should_not == "something_else"
38
+ end
39
+
40
+ describe "conditional directives" do
41
+ it "should honor :unless conditionals" do
42
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => lambda { |row| row[:a] == "blah" }}}, [:a, :b, :c])
43
+ new_row[0].should_not == "123"
44
+ new_row[0].should == "blah"
45
+
46
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => lambda { |row| row[:a] == "not blah" }}}, [:a, :b, :c])
47
+ new_row[0].should == "123"
48
+
49
+ new_row = MyObfuscate.apply_table_config([nil, "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => :nil}, :b=> {:type => :fixed, :string => "123", :unless => :nil}}, [:a, :b, :c])
50
+ new_row[0].should == nil
51
+ new_row[1].should == "123"
52
+
53
+ new_row = MyObfuscate.apply_table_config(['', "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => :blank}, :b=> {:type => :fixed, :string => "123", :unless => :blank}}, [:a, :b, :c])
54
+ new_row[0].should == ''
55
+ new_row[1].should == "123"
56
+ end
57
+
58
+ it "should honor :if conditionals" do
59
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "blah" }}}, [:a, :b, :c])
60
+ new_row[0].should == "123"
61
+
62
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if=> lambda { |row| row[:a] == "not blah" }}}, [:a, :b, :c])
63
+ new_row[0].should_not == "123"
64
+ new_row[0].should == "blah"
65
+
66
+ new_row = MyObfuscate.apply_table_config([nil, "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => :nil}, :b=> {:type => :fixed, :string => "123", :if => :nil}}, [:a, :b, :c])
67
+ new_row[0].should == "123"
68
+ new_row[1].should == "something_else"
69
+
70
+ new_row = MyObfuscate.apply_table_config(['', "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => :blank}, :b=> {:type => :fixed, :string => "123", :if => :blank}}, [:a, :b, :c])
71
+ new_row[0].should == "123"
72
+ new_row[1].should == "something_else"
73
+ end
74
+
75
+ it "should supply the original row values to the conditional" do
76
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else"], {:a => {:type => :fixed, :string => "123"}, :b => {:type => :fixed, :string => "yup", :if => lambda { |row| row[:a] == "blah" }}}, [:a, :b])
77
+ new_row[0].should == "123"
78
+ new_row[1].should == "yup"
79
+ end
80
+
81
+ it "should honor combined :unless and :if conditionals" do
82
+ #both true
83
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "blah" }, :unless => lambda { |row| row[:b] == "something_else" }}}, [:a, :b, :c])
84
+ new_row[0].should == "blah"
85
+
86
+ #both false
87
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "not blah" }, :unless => lambda { |row| row[:b] == "not something_else" }}}, [:a, :b, :c])
88
+ new_row[0].should == "blah"
89
+
90
+ #if true, #unless false
91
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "blah" }, :unless => lambda { |row| row[:b] == "not something_else" }}}, [:a, :b, :c])
92
+ new_row[0].should == "123"
93
+
94
+ #if false, #unless true
95
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "not blah" }, :unless => lambda { |row| row[:b] == "something_else" }}}, [:a, :b, :c])
96
+ new_row[0].should == "blah"
97
+ end
98
+ end
99
+
100
+ it "should be able to generate random integers in ranges" do
101
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:c => {:type => :integer, :between => 10..100}}, [:a, :b, :c])
102
+ new_row.length.should == 3
103
+ new_row[2].to_i.to_s.should == new_row[2] # It should be an integer.
104
+ new_row[2].should_not == "5"
105
+ end
106
+
107
+ it "should be able to substitute fixed strings" do
108
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :fixed, :string => "hello"}}, [:a, :b, :c])
109
+ new_row.length.should == 3
110
+ new_row[1].should == "hello"
111
+ end
112
+
113
+ it "should be able to substitute a proc that returns a string" do
114
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :fixed, :string => proc { "Hello World" }}}, [:a, :b, :c])
115
+ new_row.length.should == 3
116
+ new_row[1].should == "Hello World"
117
+ end
118
+
119
+ it "should provide the row to the proc" do
120
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :fixed, :string => proc { |a| a[:b] }}}, [:a, :b, :c])
121
+ new_row.length.should == 3
122
+ new_row[1].should == "something_else"
123
+ end
124
+
125
+ it "should be able to substitute fixed strings from a random set" do
126
+ looking_for = ["hello", "world"]
127
+ original_looking_for = looking_for.dup
128
+ guard = 0
129
+ while !looking_for.empty? && guard < 1000
130
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => {:type => :fixed, :one_of => ["hello", "world"]}}, [:a, :b, :c])
131
+ new_row.length.should == 3
132
+ original_looking_for.should include(new_row[0])
133
+ looking_for.delete new_row[0]
134
+ guard += 1
135
+ end
136
+ looking_for.should be_empty
137
+ end
138
+
139
+ it "should treat a symbol in the column definition as an implicit { :type => symbol }" do
140
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => :null, :a => :keep}, [:a, :b, :c])
141
+ new_row.length.should == 3
142
+ new_row[0].should == "blah"
143
+ new_row[1].should == nil
144
+ end
145
+
146
+ it "should be able to set things NULL" do
147
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :null}}, [:a, :b, :c])
148
+ new_row.length.should == 3
149
+ new_row[1].should == nil
150
+ end
151
+
152
+ it "should be able to :keep the value the same" do
153
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :keep}}, [:a, :b, :c])
154
+ new_row.length.should == 3
155
+ new_row[1].should == "something_else"
156
+ end
157
+
158
+ it "should keep the value when given an unknown type, but should display a warning" do
159
+ $stderr = error_output = StringIO.new
160
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :unknown_type}}, [:a, :b, :c])
161
+ $stderr = STDERR
162
+ new_row.length.should == 3
163
+ new_row[1].should == "something_else"
164
+ error_output.rewind
165
+ error_output.read.should =~ /Keeping a column value by.*?unknown_type/
166
+ end
167
+
168
+ it "should be able to substitute lorem ipsum text" do
169
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :lorem, :b => {:type => :lorem, :number => 2}}, [:a, :b, :c])
170
+ new_row.length.should == 3
171
+ new_row[0].should_not == "blah"
172
+ new_row[0].should_not =~ /\w\.(?!\Z)/
173
+ new_row[1].should_not == "something_else"
174
+ new_row[1].should =~ /\w\.(?!\Z)/
175
+ end
176
+
177
+ it "should be able to generate an :address" do
178
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :address}, [:a, :b, :c])
179
+ new_row.length.should == 3
180
+ new_row[0].should_not == "blah"
181
+ new_row[0].should =~ /\d+ \w/
182
+ end
183
+
184
+ it "should be able to generate a :name" do
185
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :name}, [:a, :b, :c])
186
+ new_row.length.should == 3
187
+ new_row[0].should_not == "blah"
188
+ new_row[0].should =~ / /
189
+ end
190
+
191
+ it "should be able to generate just a street address" do
192
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :street_address}, [:a, :b, :c])
193
+ new_row.length.should == 3
194
+ new_row[0].should_not == "blah"
195
+ new_row[0].should =~ /\d+ \w/
196
+ end
197
+
198
+ it "should be able to generate a city" do
199
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :city}, [:a, :b, :c])
200
+ new_row.length.should == 3
201
+ new_row[0].should_not == "blah"
202
+ end
203
+
204
+ it "should be able to generate a state" do
205
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :state}, [:a, :b, :c])
206
+ new_row.length.should == 3
207
+ new_row[0].should_not == "blah"
208
+ end
209
+
210
+ it "should be able to generate a zip code" do
211
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :zip_code}, [:a, :b, :c])
212
+ new_row.length.should == 3
213
+ new_row[0].should_not == "blah"
214
+ new_row[0].should =~ /\d+/
215
+ end
216
+
217
+ it "should be able to generate a phone number" do
218
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :phone}, [:a, :b, :c])
219
+ new_row.length.should == 3
220
+ new_row[0].should_not == "blah"
221
+ new_row[0].should =~ /\d+/
222
+ end
223
+
224
+ describe "when faker generates values with quotes in them" do
225
+ before do
226
+ Faker::Address.stub(:city).and_return("O'ReillyTown")
227
+ Faker::Name.stub(:name).and_return("Foo O'Reilly")
228
+ Faker::Name.stub(:first_name).and_return("O'Foo")
229
+ Faker::Name.stub(:last_name).and_return("O'Reilly")
230
+ Faker::Lorem.stub(:sentences).with(any_args).and_return(["Foo bar O'Thingy"])
231
+ end
232
+
233
+ it "should remove single quotes from the value" do
234
+ new_row = MyObfuscate.apply_table_config(["address", "city", "first", "last", "fullname", "some text"],
235
+ {:a => :address, :b => :city, :c => :first_name, :d => :last_name, :e => :name, :f => :lorem},
236
+ [:a, :b, :c, :d, :e, :f])
237
+ new_row.each {|value| value.should_not include("'")}
238
+ end
239
+ end
240
+ end
241
+
242
+ describe "MyObfuscate.row_as_hash" do
243
+ it "will map row values into a hash with column names as keys" do
244
+ MyObfuscate.row_as_hash([1, 2, 3, 4], [:a, :b, :c, :d]).should == {:a => 1, :b => 2, :c => 3, :d => 4}
245
+ end
246
+ end
247
+
248
+ describe "#obfuscate" do
249
+ describe "when using MySQL" do
250
+ context "when there is nothing to obfuscate" do
251
+ it "should accept an IO object for input and output, and copy the input to the output" do
252
+ ddo = MyObfuscate.new
253
+ string = "hello, world\nsup?"
254
+ input = StringIO.new(string)
255
+ output = StringIO.new
256
+ ddo.obfuscate(input, output)
257
+ input.rewind
258
+ output.rewind
259
+ output.read.should == string
260
+ end
261
+ end
262
+
263
+ context "when the dump to obfuscate is missing columns" do
264
+ before do
265
+ @database_dump = StringIO.new(<<-SQL)
266
+ INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);
267
+ SQL
268
+ @ddo = MyObfuscate.new({
269
+ :some_table => {
270
+ :email => {:type => :email, :honk_email_skip => true},
271
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
272
+ :gender => {:type => :fixed, :string => "m"}
273
+ }})
274
+ @output = StringIO.new
275
+ end
276
+
277
+ it "should raise an error if a column name can't be found" do
278
+ lambda {
279
+ @ddo.obfuscate(@database_dump, @output)
280
+ }.should raise_error
281
+ end
282
+ end
283
+
284
+ context "when there is something to obfuscate" do
285
+ before do
286
+ @database_dump = StringIO.new(<<-SQL)
287
+ INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54),('dontmurderme@direwolf.com','direwolf', 'somethingelse3', 44);
288
+ INSERT INTO `another_table` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4), (5,6,7,8);
289
+ INSERT INTO `some_table_to_keep` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4), (5,6,7,8);
290
+ INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh', 'aawefjkafe'), ('hello1','kjhj!', 892938), ('hello2','moose!!', NULL);
291
+ INSERT INTO `an_ignored_table` (`col`, `col2`) VALUES ('hello','kjhjd^&dkjh'), ('hello1','kjhj!'), ('hello2','moose!!');
292
+ SQL
293
+
294
+ @ddo = MyObfuscate.new({
295
+ :some_table => {
296
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
297
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
298
+ :age => {:type => :integer, :between => 10...80}
299
+ },
300
+ :another_table => :truncate,
301
+ :some_table_to_keep => :keep,
302
+ :one_more_table => {
303
+ # Note: fixed strings must be pre-SQL escaped!
304
+ :password => {:type => :fixed, :string => "monkey"},
305
+ :c => {:type => :null}
306
+ }
307
+ })
308
+ @output = StringIO.new
309
+ $stderr = @error_output = StringIO.new
310
+ @ddo.obfuscate(@database_dump, @output)
311
+ $stderr = STDERR
312
+ @output.rewind
313
+ @output_string = @output.read
314
+ end
315
+
316
+ it "should be able to truncate tables" do
317
+ @output_string.should_not include("INSERT INTO `another_table`")
318
+ @output_string.should include("INSERT INTO `one_more_table`")
319
+ end
320
+
321
+ it "should be able to declare tables to keep" do
322
+ @output_string.should include("INSERT INTO `some_table_to_keep` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4), (5,6,7,8);")
323
+ end
324
+
325
+ it "should ignore tables that it doesn't know about, but should warn" do
326
+ @output_string.should include("INSERT INTO `an_ignored_table` (`col`, `col2`) VALUES ('hello','kjhjd^&dkjh'), ('hello1','kjhj!'), ('hello2','moose!!');")
327
+ @error_output.rewind
328
+ @error_output.read.should =~ /an_ignored_table was not specified in the config/
329
+ end
330
+
331
+ it "should obfuscate the tables" do
332
+ @output_string.should include("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES (")
333
+ @output_string.should include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES (")
334
+ @output_string.should include("'some\\'thin,ge())lse1'")
335
+ @output_string.should include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','monkey',NULL),('hello1','monkey',NULL),('hello2','monkey',NULL);")
336
+ @output_string.should_not include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh', 'aawefjkafe'), ('hello1','kjhj!', 892938), ('hello2','moose!!', NULL);")
337
+ @output_string.should_not include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh','aawefjkafe'),('hello1','kjhj!',892938),('hello2','moose!!',NULL);")
338
+ @output_string.should_not include("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
339
+ end
340
+
341
+ it "honors a special case: on the people table, rows with anything@honk.com in a slot marked with :honk_email_skip do not change this slot" do
342
+ @output_string.should include("('bob@honk.com',")
343
+ @output_string.should include("('dontmurderme@direwolf.com',")
344
+ @output_string.should_not include("joe@joe.com")
345
+ end
346
+ end
347
+
348
+ context "when fail_on_unspecified_columns is set to true" do
349
+ before do
350
+ @database_dump = StringIO.new(<<-SQL)
351
+ INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54),('dontmurderme@direwolf.com','direwolf', 'somethingelse3', 44);
352
+ SQL
353
+
354
+ @ddo = MyObfuscate.new({
355
+ :some_table => {
356
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
357
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
358
+ :age => {:type => :integer, :between => 10...80}
359
+ }
360
+ })
361
+ @ddo.fail_on_unspecified_columns = true
362
+ end
363
+
364
+ it "should raise an exception when an unspecified column is found" do
365
+ lambda {
366
+ @ddo.obfuscate(@database_dump, StringIO.new)
367
+ }.should raise_error(/column 'something' defined/i)
368
+ end
369
+
370
+ it "should accept columns defined in globally_kept_columns" do
371
+ @ddo.globally_kept_columns = %w[something]
372
+ lambda {
373
+ @ddo.obfuscate(@database_dump, StringIO.new)
374
+ }.should_not raise_error
375
+ end
376
+ end
377
+ end
378
+
379
+ describe "when using MS SQL Server" do
380
+ context "when there is nothing to obfuscate" do
381
+ it "should accept an IO object for input and output, and copy the input to the output" do
382
+ ddo = MyObfuscate.new
383
+ ddo.database_type = :sql_server
384
+ string = "hello, world\nsup?"
385
+ input = StringIO.new(string)
386
+ output = StringIO.new
387
+ ddo.obfuscate(input, output)
388
+ input.rewind
389
+ output.rewind
390
+ output.read.should == string
391
+ end
392
+ end
393
+
394
+ context "when the dump to obfuscate is missing columns" do
395
+ before do
396
+ @database_dump = StringIO.new(<<-SQL)
397
+ INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
398
+ SQL
399
+ @ddo = MyObfuscate.new({
400
+ :some_table => {
401
+ :email => {:type => :email, :honk_email_skip => true},
402
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
403
+ :gender => {:type => :fixed, :string => "m"}
404
+ }})
405
+ @ddo.database_type = :sql_server
406
+ @output = StringIO.new
407
+ end
408
+
409
+ it "should raise an error if a column name can't be found" do
410
+ lambda {
411
+ @ddo.obfuscate(@database_dump, @output)
412
+ }.should raise_error
413
+ end
414
+ end
415
+
416
+ context "when there is something to obfuscate" do
417
+ before do
418
+ @database_dump = StringIO.new(<<-SQL)
419
+ INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (N'bob@honk.com',N'bob', N'some''thin,ge())lse1', 25, CAST(0x00009E1A00000000 AS DATETIME));
420
+ INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (N'joe@joe.com',N'joe', N'somethingelse2', 54, CAST(0x00009E1A00000000 AS DATETIME));
421
+ INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (N'dontmurderme@direwolf.com',N'direwolf', N'somethingelse3', 44, CAST(0x00009E1A00000000 AS DATETIME));
422
+ INSERT [dbo].[another_table] ([a], [b], [c], [d]) VALUES (1,2,3,4);
423
+ INSERT [dbo].[another_table] ([a], [b], [c], [d]) VALUES (5,6,7,8);
424
+ INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (1,2,3,4);
425
+ INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (5,6,7,8);
426
+ INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello',N'kjhjd^&dkjh', N'aawefjkafe');
427
+ INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello1',N'kjhj!', 892938);
428
+ INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello2',N'moose!!', NULL);
429
+ INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello',N'kjhjd^&dkjh');
430
+ INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello1',N'kjhj!');
431
+ INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello2',N'moose!!');
432
+ SQL
433
+
434
+ @ddo = MyObfuscate.new({
435
+ :some_table => {
436
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
437
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
438
+ :age => {:type => :integer, :between => 10...80},
439
+ :bday => :keep
440
+ },
441
+ :another_table => :truncate,
442
+ :some_table_to_keep => :keep,
443
+ :one_more_table => {
444
+ # Note: fixed strings must be pre-SQL escaped!
445
+ :password => {:type => :fixed, :string => "monkey"},
446
+ :c => {:type => :null}
447
+ }
448
+ })
449
+ @ddo.database_type = :sql_server
450
+
451
+ @output = StringIO.new
452
+ $stderr = @error_output = StringIO.new
453
+ @ddo.obfuscate(@database_dump, @output)
454
+ $stderr = STDERR
455
+ @output.rewind
456
+ @output_string = @output.read
457
+ end
458
+
459
+ it "should be able to truncate tables" do
460
+ @output_string.should_not include("INSERT [dbo].[another_table]")
461
+ @output_string.should include("INSERT [dbo].[one_more_table]")
462
+ end
463
+
464
+ it "should be able to declare tables to keep" do
465
+ @output_string.should include("INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (1,2,3,4);")
466
+ @output_string.should include("INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (5,6,7,8);")
467
+ end
468
+
469
+ it "should ignore tables that it doesn't know about, but should warn" do
470
+ @output_string.should include("INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello',N'kjhjd^&dkjh');")
471
+ @output_string.should include("INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello1',N'kjhj!');")
472
+ @output_string.should include("INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello2',N'moose!!');")
473
+ @error_output.rewind
474
+ @error_output.read.should =~ /an_ignored_table was not specified in the config/
475
+ end
476
+
477
+ it "should obfuscate the tables" do
478
+ @output_string.should include("INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (")
479
+ @output_string.should include("CAST(0x00009E1A00000000 AS DATETIME)")
480
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (")
481
+ @output_string.should include("'some''thin,ge())lse1'")
482
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello',N'monkey',NULL);")
483
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello1',N'monkey',NULL);")
484
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello2',N'monkey',NULL);")
485
+ @output_string.should_not include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello',N'kjhjd^&dkjh', N'aawefjkafe');")
486
+ @output_string.should_not include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello1',N'kjhj!', 892938);")
487
+ @output_string.should_not include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello2',N'moose!!', NULL);")
488
+ @output_string.should_not include("INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES (N'bob@honk.com',N'bob', N'some''thin,ge())lse1', 25, CAST(0x00009E1A00000000 AS DATETIME));")
489
+ @output_string.should_not include("INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES (N'joe@joe.com',N'joe', N'somethingelse2', 54, CAST(0x00009E1A00000000 AS DATETIME));")
490
+ end
491
+
492
+ it "honors a special case: on the people table, rows with anything@honk.com in a slot marked with :honk_email_skip do not change this slot" do
493
+ @output_string.should include("(N'bob@honk.com',")
494
+ @output_string.should include("(N'dontmurderme@direwolf.com',")
495
+ @output_string.should_not include("joe@joe.com")
496
+ end
497
+ end
498
+
499
+ context "when fail_on_unspecified_columns is set to true" do
500
+ before do
501
+ @database_dump = StringIO.new(<<-SQL)
502
+ INSERT INTO [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
503
+ SQL
504
+
505
+ @ddo = MyObfuscate.new({
506
+ :some_table => {
507
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
508
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
509
+ :age => {:type => :integer, :between => 10...80}
510
+ }
511
+ })
512
+ @ddo.database_type = :sql_server
513
+ @ddo.fail_on_unspecified_columns = true
514
+ end
515
+
516
+ it "should raise an exception when an unspecified column is found" do
517
+ lambda {
518
+ @ddo.obfuscate(@database_dump, StringIO.new)
519
+ }.should raise_error(/column 'something' defined/i)
520
+ end
521
+
522
+ it "should accept columns defined in globally_kept_columns" do
523
+ @ddo.globally_kept_columns = %w[something]
524
+ lambda {
525
+ @ddo.obfuscate(@database_dump, StringIO.new)
526
+ }.should_not raise_error
527
+ end
528
+ end
529
+ end
530
+ end
531
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe MyObfuscate::Mysql do
4
+ describe "#parse_insert_statement" do
5
+ it "should return nil for other SQL syntaxes (MS SQL Server)" do
6
+ subject.parse_insert_statement("INSERT [dbo].[TASKS] ([TaskID], [TaskName]) VALUES (61, N'Report Thing')").should be_nil
7
+ end
8
+
9
+ it "should return nil for MySQL non-insert statements" do
10
+ subject.parse_insert_statement("CREATE TABLE `some_table`;").should be_nil
11
+ end
12
+
13
+ it "should return a hash of table name, column names for MySQL insert statements" do
14
+ hash = subject.parse_insert_statement("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
15
+ hash.should == {:table_name => :some_table, :column_names => [:email, :name, :something, :age]}
16
+ end
17
+ end
18
+
19
+ describe "#rows_to_be_inserted" do
20
+ it "should split a mysql string into fields" do
21
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES ('bob@bob.com','bob', 'somethingelse1', 25, '2', 10, 'hi') ; "
22
+ fields = [['bob@bob.com', 'bob', 'somethingelse1', '25', '2', '10', "hi"]]
23
+ subject.rows_to_be_inserted(string).should == fields
24
+ end
25
+
26
+ it "should work ok with escaped characters" do
27
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES ('bob,@bob.c , om', 'bo\\', b', 'some\"thin\\gel\\\\\\'se1', 25, '2', 10, 'hi', 5) ; "
28
+ fields = [['bob,@bob.c , om', 'bo\\\', b', 'some"thin\\gel\\\\\\\'se1', '25', '2', '10', "hi", "5"]]
29
+ subject.rows_to_be_inserted(string).should == fields
30
+ end
31
+
32
+ it "should work with multiple subinserts" do
33
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES (1,2,3, '((m))(oo()s,e'), ('bob,@bob.c , om', 'bo\\', b', 'some\"thin\\gel\\\\\\'se1', 25, '2', 10, 'hi', 5) ;"
34
+ fields = [["1", "2", "3", "((m))(oo()s,e"], ['bob,@bob.c , om', 'bo\\\', b', 'some"thin\\gel\\\\\\\'se1', '25', '2', '10', "hi", "5"]]
35
+ subject.rows_to_be_inserted(string).should == fields
36
+ end
37
+
38
+ it "should work ok with NULL values" do
39
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES (NULL , 'bob@bob.com','bob', NULL, 25, '2', NULL, 'hi', NULL ); "
40
+ fields = [[nil, 'bob@bob.com', 'bob', nil, '25', '2', nil, "hi", nil]]
41
+ subject.rows_to_be_inserted(string).should == fields
42
+ end
43
+
44
+ it "should work with empty strings" do
45
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES (NULL , '', '' , '', 25, '2','', 'hi','') ;"
46
+ fields = [[nil, '', '', '', '25', '2', '', "hi", '']]
47
+ subject.rows_to_be_inserted(string).should == fields
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler.require(:default, :development)
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ require 'my_obfuscate'
7
+
8
+ RSpec.configure do |config|
9
+ # config.mock_with :rr
10
+ end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ describe MyObfuscate::SqlServer do
4
+ describe "#parse_insert_statement" do
5
+ it "should return a hash of table_name, column_names for SQL Server input statements" do
6
+ hash = subject.parse_insert_statement("INSERT [dbo].[TASKS] ([TaskID], [TaskName]) VALUES (61, N'Report Thing')")
7
+ hash.should == { :table_name => :TASKS, :column_names => [:TaskID, :TaskName] }
8
+ end
9
+
10
+ it "should return nil for SQL Server non-insert statements" do
11
+ subject.parse_insert_statement("CREATE TABLE [dbo].[WORKFLOW](").should be_nil
12
+ end
13
+
14
+ it "should return nil for non-SQL Server insert statements (MySQL)" do
15
+ subject.parse_insert_statement("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);").should be_nil
16
+ end
17
+ end
18
+
19
+ describe "#rows_to_be_inserted" do
20
+ it "should split a SQL Server string into fields" do
21
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (N'bob@bob.com',N'bob', N'somethingelse1',25, '2', 10, 'hi', CAST(0x00009E1A00000000 AS DATETIME)) ; "
22
+ fields = [['bob@bob.com', 'bob', 'somethingelse1', '25', '2', '10', "hi", "CAST(0x00009E1A00000000 AS DATETIME)"]]
23
+ subject.rows_to_be_inserted(string).should == fields
24
+ end
25
+
26
+ it "should work ok with single quote escape" do
27
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (N'bob,@bob.c , om', 'bo'', b', N'some\"thingel''se1', 25, '2', 10, 'hi', 5) ; "
28
+ fields = [['bob,@bob.c , om', "bo'', b", "some\"thingel''se1", '25', '2', '10', "hi", "5"]]
29
+ subject.rows_to_be_inserted(string).should == fields
30
+ end
31
+
32
+ it "should work ok with NULL values" do
33
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (NULL , N'bob@bob.com','bob', NULL, 25, N'2', NULL, 'hi', NULL ); "
34
+ fields = [[nil, 'bob@bob.com', 'bob', nil, '25', '2', nil, "hi", nil]]
35
+ subject.rows_to_be_inserted(string).should == fields
36
+ end
37
+
38
+ it "should work with empty strings" do
39
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (NULL , N'', '' , '', 25, '2','', N'hi','') ;"
40
+ fields = [[nil, '', '','', '25', '2', '', "hi", '']]
41
+ subject.rows_to_be_inserted(string).should == fields
42
+ end
43
+ end
44
+
45
+ describe "#make_valid_value_string" do
46
+ it "should output 'NULL' when the value is nil" do
47
+ subject.make_valid_value_string(nil).should == "NULL"
48
+ end
49
+
50
+ it "should enclose the value in quotes if it's a string" do
51
+ subject.make_valid_value_string("something").should == "N'something'"
52
+ end
53
+
54
+ it "should not enclose the value in quotes if it is a method call" do
55
+ subject.make_valid_value_string("CAST(0x00009E1A00000000 AS DATETIME)").should == "CAST(0x00009E1A00000000 AS DATETIME)"
56
+ end
57
+ end
58
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iterationlabs-my_obfuscate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Cantino
9
+ - Dave Willett
10
+ - Mike Grafton
11
+ - Mason Glaves
12
+ - Greg Bell
13
+ - Mavenlink
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+ date: 2012-04-10 00:00:00.000000000 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ requirement: !ruby/object:Gem::Requirement
22
+ none: false
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ type: :development
28
+ prerelease: false
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ - !ruby/object:Gem::Dependency
36
+ name: faker
37
+ requirement: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - '='
41
+ - !ruby/object:Gem::Version
42
+ version: 0.9.5
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - '='
49
+ - !ruby/object:Gem::Version
50
+ version: 0.9.5
51
+ description: Standalone Ruby code for the selective rewriting of MySQL dumps in order
52
+ to protect user privacy.
53
+ email: andrew@iterationlabs.com
54
+ executables: []
55
+ extensions: []
56
+ extra_rdoc_files: []
57
+ files:
58
+ - .gitignore
59
+ - Gemfile
60
+ - LICENSE
61
+ - README.rdoc
62
+ - Rakefile
63
+ - iterationlabs-my_obfuscate.gemspec
64
+ - lib/my_obfuscate.rb
65
+ - lib/my_obfuscate/mysql.rb
66
+ - lib/my_obfuscate/sql_server.rb
67
+ - lib/my_obfuscate/version.rb
68
+ - spec/my_obfuscate_spec.rb
69
+ - spec/mysql_spec.rb
70
+ - spec/spec_helper.rb
71
+ - spec/sql_server_spec.rb
72
+ homepage: http://github.com/iterationlabs/myobfuscate
73
+ licenses: []
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.21
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: Standalone Ruby code for the selective rewriting of MySQL dumps in order
96
+ to protect user privacy.
97
+ test_files: []