iterationlabs-my_obfuscate 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ .idea
7
+ .rvmrc
8
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in my_obfuscate.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2009 Honk
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
@@ -0,0 +1,85 @@
1
+ = my_obfuscate
2
+
3
+ Standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. Supports MySQL and SQL Server.
4
+
5
+ = Install
6
+
7
+ sudo gem install my_obfuscate
8
+
9
+ = Example Usage
10
+
11
+ Make an obfuscator.rb script:
12
+
13
+ #!/usr/bin/env ruby
14
+ require "rubygems"
15
+ require "my_obfuscate"
16
+
17
+ obfuscator = MyObfuscate.new({
18
+ :people => {
19
+ :email => { :type => :email, :skip_regexes => [/^[\w\.\_]+@my_company\.com$/i] },
20
+ :ethnicity => :keep,
21
+ :crypted_password => { :type => :fixed, :string => "SOME_FIXED_PASSWORD_FOR_EASE_OF_DEBUGGING" },
22
+ :salt => { :type => :fixed, :string => "SOME_THING" },
23
+ :remember_token => :null,
24
+ :remember_token_expires_at => :null,
25
+ :age => { :type => :null, :unless => lambda { |person| person[:email] == "hello@example.com" } },
26
+ :photo_file_name => :null,
27
+ :photo_content_type => :null,
28
+ :photo_file_size => :null,
29
+ :photo_updated_at => :null,
30
+ :postal_code => { :type => :fixed, :string => "94109", :unless => lambda {|person| person[:postal_code] == "12345"} },
31
+ :name => :name,
32
+ :full_address => :address,
33
+ :bio => { :type => :lorem, :number => 4 },
34
+ :relationship_status => { :type => :fixed, :one_of => ["Single", "Divorced", "Married", "Engaged", "In a Relationship"] },
35
+ :has_children => { :type => :integer, :between => 0..1 },
36
+ },
37
+
38
+ :invites => :truncate,
39
+ :invite_requests => :truncate,
40
+ :tags => :keep,
41
+
42
+ :relationships => {
43
+ :account_id => :keep,
44
+ :code => { :type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS }
45
+ }
46
+ })
47
+ obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
48
+ obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
49
+ obfuscator.obfuscate(STDIN, STDOUT)
50
+
51
+ And to get an obfuscated dump:
52
+ mysqldump -c --add-drop-table -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
53
+ Note that the -c option on mysqldump is required to use my_obfuscator.
54
+
55
+ == Database Server
56
+
57
+ By default the database type is assumed to be MySQL, but you can use the
58
+ builtin SQL Server support by specifying:
59
+
60
+ obfuscator.database_type = :sql_server
61
+
62
+ == Changes
63
+
64
+ * Support for SQL Server
65
+ * :unless and :if now support :nil as a shorthand for a Proc that checks for nil
66
+ * :name, :lorem, and :address are all now supported types. You can pass :number to :lorem to specify how many sentences to generate. The default is one.
67
+ * <tt>{ :type => :whatever }</tt> is now optional when no additional options are needed. Just use <tt>:whatever</tt>.
68
+ * Warnings are thrown when an unknown column type or table is encountered. Use <tt>:keep</tt> in both cases.
69
+ * <tt>{ :type => :fixed, :string => Proc { |row| ... } }</tt> is now available.
70
+
71
+ == Note on Patches/Pull Requests
72
+
73
+ * Fork the project.
74
+ * Make your feature addition or bug fix.
75
+ * Add tests for it. This is important so I don't break it in a future version unintentionally.
76
+ * Commit, do not mess with rakefile, version, or history. (If you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
77
+ * Send me a pull request. Bonus points for topic branches.
78
+
79
+ == Thanks
80
+
81
+ Thanks to Mavenlink and Pivotal Labs for patches and updates!
82
+
83
+ == Copyright
84
+
85
+ Copyright (c) 2009 Honk. Now maintained by Iteration Labs, LLC. See LICENSE for details.
@@ -0,0 +1,8 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec) do |spec|
5
+ spec.pattern = FileList['spec/**/*_spec.rb']
6
+ end
7
+
8
+ task :default => :spec
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "my_obfuscate/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = %q{iterationlabs-my_obfuscate}
7
+ s.version = MyObfuscate::VERSION
8
+
9
+ s.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves", "Greg Bell", "Mavenlink"]
10
+ s.description = %q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
11
+ s.email = %q{andrew@iterationlabs.com}
12
+ s.homepage = %q{http://github.com/iterationlabs/myobfuscate}
13
+ s.summary = %q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
14
+
15
+ s.add_development_dependency "rspec"
16
+ s.add_dependency "faker", "=0.9.5"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+ end
@@ -0,0 +1,213 @@
1
+ require 'jcode' if RUBY_VERSION < '1.9'
2
+ require 'faker'
3
+ require 'my_obfuscate/mysql'
4
+ require 'my_obfuscate/sql_server'
5
+
6
+ # Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
7
+ # column names in the insert statements.
8
+ class MyObfuscate
9
+ attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
10
+
11
+ NUMBER_CHARS = "1234567890"
12
+ USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
13
+ SENSIBLE_CHARS = USERNAME_CHARS + '+-=[{]}/?|!@#$%^&*()`~'
14
+
15
+ # Make a new MyObfuscate object. Pass in a configuration structure to define how the obfuscation should be
16
+ # performed. See the README.rdoc file for more information.
17
+ def initialize(configuration = {})
18
+ @config = configuration
19
+ end
20
+
21
+ def fail_on_unspecified_columns?
22
+ @fail_on_unspecified_columns
23
+ end
24
+
25
+ def database_helper
26
+ if @database_helper.nil?
27
+ if @database_type == :sql_server
28
+ @database_helper = SqlServer.new
29
+ else
30
+ @database_helper = Mysql.new
31
+ end
32
+ end
33
+
34
+ @database_helper
35
+ end
36
+
37
+ # Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
38
+ # or STDIN and STDOUT.
39
+ def obfuscate(input_io, output_io)
40
+
41
+ # We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
42
+ input_io.each do |line|
43
+ if table_data = database_helper.parse_insert_statement(line)
44
+ table_name = table_data[:table_name]
45
+ columns = table_data[:column_names]
46
+ if config[table_name]
47
+ output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
48
+ else
49
+ $stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
50
+ output_io.write line
51
+ end
52
+ else
53
+ output_io.write line
54
+ end
55
+ end
56
+ end
57
+
58
+ def reassembling_each_insert(line, table_name, columns)
59
+ output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
60
+ result = yield(sub_insert)
61
+ result = result.map do |i|
62
+ database_helper.make_valid_value_string(i)
63
+ end
64
+ result = result.join(",")
65
+ "(" + result + ")"
66
+ end.join(",")
67
+ database_helper.make_insert_statement(table_name, columns, output)
68
+ end
69
+
70
+ def self.row_as_hash(row, columns)
71
+ columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
72
+ end
73
+
74
+ def self.make_conditional_method(conditional_method, index, row)
75
+ if conditional_method.is_a?(Symbol)
76
+ if conditional_method == :blank
77
+ conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
78
+ elsif conditional_method == :nil
79
+ conditional_method = lambda { |row_hash| row[index].nil? }
80
+ end
81
+ end
82
+ conditional_method
83
+ end
84
+
85
+ def self.apply_table_config(row, table_config, columns)
86
+ return row unless table_config.is_a?(Hash)
87
+ row_hash = row_as_hash(row, columns)
88
+
89
+ table_config.each do |column, definition|
90
+ index = columns.index(column)
91
+
92
+ definition = { :type => definition } if definition.is_a?(Symbol)
93
+
94
+ if definition.has_key?(:unless)
95
+ unless_check = make_conditional_method(definition[:unless], index, row)
96
+
97
+ next if unless_check.call(row_hash)
98
+ end
99
+
100
+
101
+ if definition.has_key?(:if)
102
+ if_check = make_conditional_method(definition[:if], index, row)
103
+
104
+ next unless if_check.call(row_hash)
105
+ end
106
+
107
+ if definition[:skip_regexes]
108
+ next if definition[:skip_regexes].any? {|regex| row[index] =~ regex}
109
+ end
110
+
111
+ row[index.to_i] = case definition[:type]
112
+ when :email
113
+ random_string(definition[:length] || (4..10), USERNAME_CHARS) + "@example.com"
114
+ when :string
115
+ random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
116
+ when :lorem
117
+ clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
118
+ when :name
119
+ clean_quotes(Faker::Name.name)
120
+ when :first_name
121
+ clean_quotes(Faker::Name.first_name)
122
+ when :last_name
123
+ clean_quotes(Faker::Name.last_name)
124
+ when :address
125
+ clean_quotes("#{Faker::Address.street_address}\\n#{Faker::Address.city}, #{Faker::Address.state_abbr} #{Faker::Address.zip_code}")
126
+ when :street_address
127
+ clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
128
+ when :city
129
+ clean_quotes(Faker::Address.city)
130
+ when :state
131
+ Faker::Address.state_abbr
132
+ when :zip_code
133
+ Faker::Address.zip_code
134
+ when :phone
135
+ Faker::PhoneNumber.phone_number
136
+ when :integer
137
+ random_integer(definition[:between] || (0..1000)).to_s
138
+ when :fixed
139
+ if definition[:one_of]
140
+ definition[:one_of][(rand * definition[:one_of].length).to_i]
141
+ else
142
+ definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
143
+ end
144
+ when :null
145
+ nil
146
+ when :keep
147
+ row[index]
148
+ else
149
+ $stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
150
+ row[index]
151
+ end
152
+ end
153
+ row
154
+ end
155
+
156
+ def self.random_integer(between)
157
+ (between.min + (between.max - between.min) * rand).round
158
+ end
159
+
160
+ def self.random_string(length_or_range, chars)
161
+ length_or_range = (length_or_range..length_or_range) if length_or_range.is_a?(Fixnum)
162
+ times = random_integer(length_or_range)
163
+ out = ""
164
+ times.times { out << chars[rand * chars.length] }
165
+ out
166
+ end
167
+
168
+ def check_for_defined_columns_not_in_table(table_name, columns)
169
+ missing_columns = config[table_name].keys - columns
170
+ unless missing_columns.length == 0
171
+ error_message = missing_columns.map do |missing_column|
172
+ "Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
173
+ end.join("\n")
174
+ raise RuntimeError.new(error_message)
175
+ end
176
+ end
177
+
178
+ def check_for_table_columns_not_in_definition(table_name, columns)
179
+ missing_columns = columns - (config[table_name].keys + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
180
+ unless missing_columns.length == 0
181
+ error_message = missing_columns.map do |missing_column|
182
+ "Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
183
+ end.join("\n")
184
+ raise RuntimeError.new(error_message)
185
+ end
186
+ end
187
+
188
+ def obfuscate_bulk_insert_line(line, table_name, columns)
189
+ table_config = config[table_name]
190
+ if table_config == :truncate
191
+ ""
192
+ elsif table_config == :keep
193
+ line
194
+ else
195
+ check_for_defined_columns_not_in_table(table_name, columns)
196
+ check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
197
+ # Note: Remember to SQL escape strings in what you pass back.
198
+ reassembling_each_insert(line, table_name, columns) do |row|
199
+ MyObfuscate.apply_table_config(row, table_config, columns)
200
+ end
201
+ end
202
+ end
203
+
204
+ private
205
+
206
+ def self.clean_quotes(value)
207
+ value.gsub(/['"]/, '')
208
+ end
209
+
210
+ def self.clean_bad_whitespace(value)
211
+ value.gsub(/[\n\t\r]/, '')
212
+ end
213
+ end
@@ -0,0 +1,87 @@
1
+ class MyObfuscate::Mysql
2
+ INSERT_REGEX = /^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
3
+
4
+ def parse_insert_statement(line)
5
+ if regex_match = INSERT_REGEX.match(line)
6
+ {
7
+ :table_name => regex_match[1].to_sym,
8
+ :column_names => regex_match[2].split(/`\s*,\s*`/).map { |col| col.gsub('`', "").to_sym }
9
+ }
10
+ end
11
+ end
12
+
13
+ def rows_to_be_inserted(line)
14
+ line = line.gsub(INSERT_REGEX, '').gsub(/\s*;\s*$/, '')
15
+ context_aware_mysql_string_split(line)
16
+ end
17
+
18
+ def make_valid_value_string(value)
19
+ if value.nil?
20
+ "NULL"
21
+ else
22
+ "'" + value + "'"
23
+ end
24
+ end
25
+
26
+ def make_insert_statement(table_name, column_names, values_strings)
27
+ "INSERT INTO `#{table_name}` (`#{column_names.join('`, `')}`) VALUES #{values_strings};"
28
+ end
29
+
30
+ private
31
+
32
+ # Be aware, strings must be quoted in single quotes!
33
+ def context_aware_mysql_string_split(string)
34
+ in_sub_insert = false
35
+ in_quoted_string = false
36
+ escaped = false
37
+ current_field = nil
38
+ length = string.length
39
+ fields = []
40
+ output = []
41
+
42
+ string.each_char do |i|
43
+ if escaped
44
+ escaped = false
45
+ current_field ||= ""
46
+ current_field << i
47
+ else
48
+ if i == "\\"
49
+ escaped = true
50
+ current_field ||= ""
51
+ current_field << i
52
+ elsif i == "(" && !in_quoted_string && !in_sub_insert
53
+ in_sub_insert = true
54
+ elsif i == ")" && !in_quoted_string && in_sub_insert
55
+ fields << current_field unless current_field.nil?
56
+ output << fields unless fields.length == 0
57
+ in_sub_insert = false
58
+ fields = []
59
+ current_field = nil
60
+ elsif i == "'" && !in_quoted_string
61
+ fields << current_field unless current_field.nil?
62
+ current_field = ''
63
+ in_quoted_string = true
64
+ elsif i == "'" && in_quoted_string
65
+ fields << current_field unless current_field.nil?
66
+ current_field = nil
67
+ in_quoted_string = false
68
+ elsif i == "," && !in_quoted_string && in_sub_insert
69
+ fields << current_field unless current_field.nil?
70
+ current_field = nil
71
+ elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
72
+ current_field = nil
73
+ fields << current_field
74
+ elsif (i == " " || i == "\t") && !in_quoted_string
75
+ # Don't add whitespace not in a string
76
+ elsif in_sub_insert
77
+ current_field ||= ""
78
+ current_field << i
79
+ end
80
+ end
81
+ end
82
+
83
+ fields << current_field unless current_field.nil?
84
+ output << fields unless fields.length == 0
85
+ output
86
+ end
87
+ end
@@ -0,0 +1,79 @@
1
+ class MyObfuscate::SqlServer
2
+ INSERT_REGEX = /^\s*INSERT (?:INTO )?\[dbo\]\.\[(.*?)\] \((.*?)\) VALUES\s*/i
3
+
4
+ def parse_insert_statement(line)
5
+ if regex_match = INSERT_REGEX.match(line)
6
+ {
7
+ :table_name => regex_match[1].to_sym,
8
+ :column_names => regex_match[2].split(/\]\s*,\s*\[/).map { |col| col.gsub(/[\[\]]/, "").to_sym }
9
+ }
10
+ end
11
+ end
12
+
13
+ def rows_to_be_inserted(line)
14
+ line = line.gsub(INSERT_REGEX, '').gsub(/\s*;?\s*$/, '').gsub(/^\(/, '').gsub(/\)$/, '')
15
+ context_aware_sql_server_string_split(line)
16
+ end
17
+
18
+ def make_valid_value_string(value)
19
+ if value.nil?
20
+ "NULL"
21
+ elsif value.match(/^[A-Z]+\(.*?\)$/)
22
+ value
23
+ else
24
+ "N'#{value}'"
25
+ end
26
+ end
27
+
28
+ def make_insert_statement(table_name, column_names, values_strings)
29
+ "INSERT [dbo].[#{table_name}] ([#{column_names.join("], [")}]) VALUES #{values_strings};"
30
+ end
31
+
32
+ private
33
+
34
+ def context_aware_sql_server_string_split(string)
35
+ in_quoted_string = false
36
+ backslash_escape = false
37
+ previous_char_single_quote = false
38
+ current_field_value = nil
39
+ completed_fields = []
40
+
41
+ string.each_char do |char|
42
+ if char == "'" && !in_quoted_string
43
+ if current_field_value != "N"
44
+ completed_fields << current_field_value unless current_field_value.nil?
45
+ end
46
+ current_field_value = ""
47
+ in_quoted_string = true
48
+ elsif previous_char_single_quote
49
+ previous_char_single_quote = false
50
+ if char == "'"
51
+ current_field_value << "''"
52
+ else
53
+ completed_fields << current_field_value unless current_field_value.nil?
54
+ in_quoted_string = false
55
+ current_field_value = nil
56
+ end
57
+ elsif char == "'" && in_quoted_string
58
+ previous_char_single_quote = true
59
+ elsif char == "," && !in_quoted_string
60
+ completed_fields << current_field_value unless current_field_value.nil?
61
+ current_field_value = nil
62
+ elsif char == "L" && !in_quoted_string && current_field_value == "NUL"
63
+ current_field_value = nil
64
+ completed_fields << current_field_value
65
+ elsif (char == " " || char == "\t") && !in_quoted_string
66
+ if !current_field_value.nil? && current_field_value.start_with?("CAST(")
67
+ current_field_value << char
68
+ end
69
+ # Don't add whitespace not in a string
70
+ else
71
+ current_field_value ||= ""
72
+ current_field_value << char
73
+ end
74
+ end
75
+
76
+ completed_fields << current_field_value unless current_field_value.nil?
77
+ [completed_fields]
78
+ end
79
+ end
@@ -0,0 +1,3 @@
1
+ class MyObfuscate
2
+ VERSION = "0.3.3"
3
+ end
@@ -0,0 +1,531 @@
1
+ require 'spec_helper'
2
+
3
+ describe MyObfuscate do
4
+ describe "MyObfuscate.reassembling_each_insert" do
5
+ before do
6
+ @column_names = [:a, :b, :c, :d]
7
+ @test_insert = "INSERT INTO `some_table` (`a`, `b`, `c`, `d`) VALUES ('(\\'bob@bob.com','b()ob','some(thingelse1','25)('),('joe@joe.com','joe','somethingelse2','54');"
8
+ @test_insert_passes = [
9
+ ["(\\'bob@bob.com", "b()ob", "some(thingelse1", "25)("],
10
+ ["joe@joe.com", "joe", "somethingelse2", "54"]
11
+ ]
12
+ end
13
+
14
+ it "should yield each subinsert and reassemble the result" do
15
+ count = 0
16
+ reassembled = MyObfuscate.new.reassembling_each_insert(@test_insert, "some_table", @column_names) do |sub_insert|
17
+ sub_insert.should == @test_insert_passes.shift
18
+ count += 1
19
+ sub_insert
20
+ end
21
+ count.should == 2
22
+ reassembled.should == @test_insert
23
+ end
24
+ end
25
+
26
+ describe "MyObfuscate.apply_table_config" do
27
+ it "should work on email addresses" do
28
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else"], {:a => {:type => :email}}, [:a, :b])
29
+ new_row.length.should == 2
30
+ new_row.first.should =~ /^\w+\@\w+\.\w+$/
31
+ end
32
+
33
+ it "should work on strings" do
34
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "something crazy"], {:b => {:type => :string, :length => 7}}, [:a, :b, :c])
35
+ new_row.length.should == 3
36
+ new_row[1].length.should == 7
37
+ new_row[1].should_not == "something_else"
38
+ end
39
+
40
+ describe "conditional directives" do
41
+ it "should honor :unless conditionals" do
42
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => lambda { |row| row[:a] == "blah" }}}, [:a, :b, :c])
43
+ new_row[0].should_not == "123"
44
+ new_row[0].should == "blah"
45
+
46
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => lambda { |row| row[:a] == "not blah" }}}, [:a, :b, :c])
47
+ new_row[0].should == "123"
48
+
49
+ new_row = MyObfuscate.apply_table_config([nil, "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => :nil}, :b=> {:type => :fixed, :string => "123", :unless => :nil}}, [:a, :b, :c])
50
+ new_row[0].should == nil
51
+ new_row[1].should == "123"
52
+
53
+ new_row = MyObfuscate.apply_table_config(['', "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :unless => :blank}, :b=> {:type => :fixed, :string => "123", :unless => :blank}}, [:a, :b, :c])
54
+ new_row[0].should == ''
55
+ new_row[1].should == "123"
56
+ end
57
+
58
+ it "should honor :if conditionals" do
59
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "blah" }}}, [:a, :b, :c])
60
+ new_row[0].should == "123"
61
+
62
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if=> lambda { |row| row[:a] == "not blah" }}}, [:a, :b, :c])
63
+ new_row[0].should_not == "123"
64
+ new_row[0].should == "blah"
65
+
66
+ new_row = MyObfuscate.apply_table_config([nil, "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => :nil}, :b=> {:type => :fixed, :string => "123", :if => :nil}}, [:a, :b, :c])
67
+ new_row[0].should == "123"
68
+ new_row[1].should == "something_else"
69
+
70
+ new_row = MyObfuscate.apply_table_config(['', "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => :blank}, :b=> {:type => :fixed, :string => "123", :if => :blank}}, [:a, :b, :c])
71
+ new_row[0].should == "123"
72
+ new_row[1].should == "something_else"
73
+ end
74
+
75
+ it "should supply the original row values to the conditional" do
76
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else"], {:a => {:type => :fixed, :string => "123"}, :b => {:type => :fixed, :string => "yup", :if => lambda { |row| row[:a] == "blah" }}}, [:a, :b])
77
+ new_row[0].should == "123"
78
+ new_row[1].should == "yup"
79
+ end
80
+
81
+ it "should honor combined :unless and :if conditionals" do
82
+ #both true
83
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "blah" }, :unless => lambda { |row| row[:b] == "something_else" }}}, [:a, :b, :c])
84
+ new_row[0].should == "blah"
85
+
86
+ #both false
87
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "not blah" }, :unless => lambda { |row| row[:b] == "not something_else" }}}, [:a, :b, :c])
88
+ new_row[0].should == "blah"
89
+
90
+ #if true, #unless false
91
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "blah" }, :unless => lambda { |row| row[:b] == "not something_else" }}}, [:a, :b, :c])
92
+ new_row[0].should == "123"
93
+
94
+ #if false, #unless true
95
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a=> {:type => :fixed, :string => "123", :if => lambda { |row| row[:a] == "not blah" }, :unless => lambda { |row| row[:b] == "something_else" }}}, [:a, :b, :c])
96
+ new_row[0].should == "blah"
97
+ end
98
+ end
99
+
100
+ it "should be able to generate random integers in ranges" do
101
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:c => {:type => :integer, :between => 10..100}}, [:a, :b, :c])
102
+ new_row.length.should == 3
103
+ new_row[2].to_i.to_s.should == new_row[2] # It should be an integer.
104
+ new_row[2].should_not == "5"
105
+ end
106
+
107
+ it "should be able to substitute fixed strings" do
108
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :fixed, :string => "hello"}}, [:a, :b, :c])
109
+ new_row.length.should == 3
110
+ new_row[1].should == "hello"
111
+ end
112
+
113
+ it "should be able to substitute a proc that returns a string" do
114
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :fixed, :string => proc { "Hello World" }}}, [:a, :b, :c])
115
+ new_row.length.should == 3
116
+ new_row[1].should == "Hello World"
117
+ end
118
+
119
+ it "should provide the row to the proc" do
120
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :fixed, :string => proc { |a| a[:b] }}}, [:a, :b, :c])
121
+ new_row.length.should == 3
122
+ new_row[1].should == "something_else"
123
+ end
124
+
125
+ it "should be able to substitute fixed strings from a random set" do
126
+ looking_for = ["hello", "world"]
127
+ original_looking_for = looking_for.dup
128
+ guard = 0
129
+ while !looking_for.empty? && guard < 1000
130
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => {:type => :fixed, :one_of => ["hello", "world"]}}, [:a, :b, :c])
131
+ new_row.length.should == 3
132
+ original_looking_for.should include(new_row[0])
133
+ looking_for.delete new_row[0]
134
+ guard += 1
135
+ end
136
+ looking_for.should be_empty
137
+ end
138
+
139
+ it "should treat a symbol in the column definition as an implicit { :type => symbol }" do
140
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => :null, :a => :keep}, [:a, :b, :c])
141
+ new_row.length.should == 3
142
+ new_row[0].should == "blah"
143
+ new_row[1].should == nil
144
+ end
145
+
146
+ it "should be able to set things NULL" do
147
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :null}}, [:a, :b, :c])
148
+ new_row.length.should == 3
149
+ new_row[1].should == nil
150
+ end
151
+
152
+ it "should be able to :keep the value the same" do
153
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :keep}}, [:a, :b, :c])
154
+ new_row.length.should == 3
155
+ new_row[1].should == "something_else"
156
+ end
157
+
158
+ it "should keep the value when given an unknown type, but should display a warning" do
159
+ $stderr = error_output = StringIO.new
160
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:b => {:type => :unknown_type}}, [:a, :b, :c])
161
+ $stderr = STDERR
162
+ new_row.length.should == 3
163
+ new_row[1].should == "something_else"
164
+ error_output.rewind
165
+ error_output.read.should =~ /Keeping a column value by.*?unknown_type/
166
+ end
167
+
168
+ it "should be able to substitute lorem ipsum text" do
169
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :lorem, :b => {:type => :lorem, :number => 2}}, [:a, :b, :c])
170
+ new_row.length.should == 3
171
+ new_row[0].should_not == "blah"
172
+ new_row[0].should_not =~ /\w\.(?!\Z)/
173
+ new_row[1].should_not == "something_else"
174
+ new_row[1].should =~ /\w\.(?!\Z)/
175
+ end
176
+
177
+ it "should be able to generate an :address" do
178
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :address}, [:a, :b, :c])
179
+ new_row.length.should == 3
180
+ new_row[0].should_not == "blah"
181
+ new_row[0].should =~ /\d+ \w/
182
+ end
183
+
184
+ it "should be able to generate a :name" do
185
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :name}, [:a, :b, :c])
186
+ new_row.length.should == 3
187
+ new_row[0].should_not == "blah"
188
+ new_row[0].should =~ / /
189
+ end
190
+
191
+ it "should be able to generate just a street address" do
192
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :street_address}, [:a, :b, :c])
193
+ new_row.length.should == 3
194
+ new_row[0].should_not == "blah"
195
+ new_row[0].should =~ /\d+ \w/
196
+ end
197
+
198
+ it "should be able to generate a city" do
199
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :city}, [:a, :b, :c])
200
+ new_row.length.should == 3
201
+ new_row[0].should_not == "blah"
202
+ end
203
+
204
+ it "should be able to generate a state" do
205
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :state}, [:a, :b, :c])
206
+ new_row.length.should == 3
207
+ new_row[0].should_not == "blah"
208
+ end
209
+
210
+ it "should be able to generate a zip code" do
211
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :zip_code}, [:a, :b, :c])
212
+ new_row.length.should == 3
213
+ new_row[0].should_not == "blah"
214
+ new_row[0].should =~ /\d+/
215
+ end
216
+
217
+ it "should be able to generate a phone number" do
218
+ new_row = MyObfuscate.apply_table_config(["blah", "something_else", "5"], {:a => :phone}, [:a, :b, :c])
219
+ new_row.length.should == 3
220
+ new_row[0].should_not == "blah"
221
+ new_row[0].should =~ /\d+/
222
+ end
223
+
224
+ describe "when faker generates values with quotes in them" do
225
+ before do
226
+ Faker::Address.stub(:city).and_return("O'ReillyTown")
227
+ Faker::Name.stub(:name).and_return("Foo O'Reilly")
228
+ Faker::Name.stub(:first_name).and_return("O'Foo")
229
+ Faker::Name.stub(:last_name).and_return("O'Reilly")
230
+ Faker::Lorem.stub(:sentences).with(any_args).and_return(["Foo bar O'Thingy"])
231
+ end
232
+
233
+ it "should remove single quotes from the value" do
234
+ new_row = MyObfuscate.apply_table_config(["address", "city", "first", "last", "fullname", "some text"],
235
+ {:a => :address, :b => :city, :c => :first_name, :d => :last_name, :e => :name, :f => :lorem},
236
+ [:a, :b, :c, :d, :e, :f])
237
+ new_row.each {|value| value.should_not include("'")}
238
+ end
239
+ end
240
+ end
241
+
242
+ describe "MyObfuscate.row_as_hash" do
243
+ it "will map row values into a hash with column names as keys" do
244
+ MyObfuscate.row_as_hash([1, 2, 3, 4], [:a, :b, :c, :d]).should == {:a => 1, :b => 2, :c => 3, :d => 4}
245
+ end
246
+ end
247
+
248
+ describe "#obfuscate" do
249
+ describe "when using MySQL" do
250
+ context "when there is nothing to obfuscate" do
251
+ it "should accept an IO object for input and output, and copy the input to the output" do
252
+ ddo = MyObfuscate.new
253
+ string = "hello, world\nsup?"
254
+ input = StringIO.new(string)
255
+ output = StringIO.new
256
+ ddo.obfuscate(input, output)
257
+ input.rewind
258
+ output.rewind
259
+ output.read.should == string
260
+ end
261
+ end
262
+
263
+ context "when the dump to obfuscate is missing columns" do
264
+ before do
265
+ @database_dump = StringIO.new(<<-SQL)
266
+ INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);
267
+ SQL
268
+ @ddo = MyObfuscate.new({
269
+ :some_table => {
270
+ :email => {:type => :email, :honk_email_skip => true},
271
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
272
+ :gender => {:type => :fixed, :string => "m"}
273
+ }})
274
+ @output = StringIO.new
275
+ end
276
+
277
+ it "should raise an error if a column name can't be found" do
278
+ lambda {
279
+ @ddo.obfuscate(@database_dump, @output)
280
+ }.should raise_error
281
+ end
282
+ end
283
+
284
+ context "when there is something to obfuscate" do
285
+ before do
286
+ @database_dump = StringIO.new(<<-SQL)
287
+ INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54),('dontmurderme@direwolf.com','direwolf', 'somethingelse3', 44);
288
+ INSERT INTO `another_table` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4), (5,6,7,8);
289
+ INSERT INTO `some_table_to_keep` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4), (5,6,7,8);
290
+ INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh', 'aawefjkafe'), ('hello1','kjhj!', 892938), ('hello2','moose!!', NULL);
291
+ INSERT INTO `an_ignored_table` (`col`, `col2`) VALUES ('hello','kjhjd^&dkjh'), ('hello1','kjhj!'), ('hello2','moose!!');
292
+ SQL
293
+
294
+ @ddo = MyObfuscate.new({
295
+ :some_table => {
296
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
297
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
298
+ :age => {:type => :integer, :between => 10...80}
299
+ },
300
+ :another_table => :truncate,
301
+ :some_table_to_keep => :keep,
302
+ :one_more_table => {
303
+ # Note: fixed strings must be pre-SQL escaped!
304
+ :password => {:type => :fixed, :string => "monkey"},
305
+ :c => {:type => :null}
306
+ }
307
+ })
308
+ @output = StringIO.new
309
+ $stderr = @error_output = StringIO.new
310
+ @ddo.obfuscate(@database_dump, @output)
311
+ $stderr = STDERR
312
+ @output.rewind
313
+ @output_string = @output.read
314
+ end
315
+
316
+ it "should be able to truncate tables" do
317
+ @output_string.should_not include("INSERT INTO `another_table`")
318
+ @output_string.should include("INSERT INTO `one_more_table`")
319
+ end
320
+
321
+ it "should be able to declare tables to keep" do
322
+ @output_string.should include("INSERT INTO `some_table_to_keep` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4), (5,6,7,8);")
323
+ end
324
+
325
+ it "should ignore tables that it doesn't know about, but should warn" do
326
+ @output_string.should include("INSERT INTO `an_ignored_table` (`col`, `col2`) VALUES ('hello','kjhjd^&dkjh'), ('hello1','kjhj!'), ('hello2','moose!!');")
327
+ @error_output.rewind
328
+ @error_output.read.should =~ /an_ignored_table was not specified in the config/
329
+ end
330
+
331
+ it "should obfuscate the tables" do
332
+ @output_string.should include("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES (")
333
+ @output_string.should include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES (")
334
+ @output_string.should include("'some\\'thin,ge())lse1'")
335
+ @output_string.should include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','monkey',NULL),('hello1','monkey',NULL),('hello2','monkey',NULL);")
336
+ @output_string.should_not include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh', 'aawefjkafe'), ('hello1','kjhj!', 892938), ('hello2','moose!!', NULL);")
337
+ @output_string.should_not include("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh','aawefjkafe'),('hello1','kjhj!',892938),('hello2','moose!!',NULL);")
338
+ @output_string.should_not include("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
339
+ end
340
+
341
+ it "honors a special case: on the people table, rows with anything@honk.com in a slot marked with :honk_email_skip do not change this slot" do
342
+ @output_string.should include("('bob@honk.com',")
343
+ @output_string.should include("('dontmurderme@direwolf.com',")
344
+ @output_string.should_not include("joe@joe.com")
345
+ end
346
+ end
347
+
348
+ context "when fail_on_unspecified_columns is set to true" do
349
+ before do
350
+ @database_dump = StringIO.new(<<-SQL)
351
+ INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54),('dontmurderme@direwolf.com','direwolf', 'somethingelse3', 44);
352
+ SQL
353
+
354
+ @ddo = MyObfuscate.new({
355
+ :some_table => {
356
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
357
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
358
+ :age => {:type => :integer, :between => 10...80}
359
+ }
360
+ })
361
+ @ddo.fail_on_unspecified_columns = true
362
+ end
363
+
364
+ it "should raise an exception when an unspecified column is found" do
365
+ lambda {
366
+ @ddo.obfuscate(@database_dump, StringIO.new)
367
+ }.should raise_error(/column 'something' defined/i)
368
+ end
369
+
370
+ it "should accept columns defined in globally_kept_columns" do
371
+ @ddo.globally_kept_columns = %w[something]
372
+ lambda {
373
+ @ddo.obfuscate(@database_dump, StringIO.new)
374
+ }.should_not raise_error
375
+ end
376
+ end
377
+ end
378
+
379
+ describe "when using MS SQL Server" do
380
+ context "when there is nothing to obfuscate" do
381
+ it "should accept an IO object for input and output, and copy the input to the output" do
382
+ ddo = MyObfuscate.new
383
+ ddo.database_type = :sql_server
384
+ string = "hello, world\nsup?"
385
+ input = StringIO.new(string)
386
+ output = StringIO.new
387
+ ddo.obfuscate(input, output)
388
+ input.rewind
389
+ output.rewind
390
+ output.read.should == string
391
+ end
392
+ end
393
+
394
+ context "when the dump to obfuscate is missing columns" do
395
+ before do
396
+ @database_dump = StringIO.new(<<-SQL)
397
+ INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
398
+ SQL
399
+ @ddo = MyObfuscate.new({
400
+ :some_table => {
401
+ :email => {:type => :email, :honk_email_skip => true},
402
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
403
+ :gender => {:type => :fixed, :string => "m"}
404
+ }})
405
+ @ddo.database_type = :sql_server
406
+ @output = StringIO.new
407
+ end
408
+
409
+ it "should raise an error if a column name can't be found" do
410
+ lambda {
411
+ @ddo.obfuscate(@database_dump, @output)
412
+ }.should raise_error
413
+ end
414
+ end
415
+
416
+ context "when there is something to obfuscate" do
417
+ before do
418
+ @database_dump = StringIO.new(<<-SQL)
419
+ INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (N'bob@honk.com',N'bob', N'some''thin,ge())lse1', 25, CAST(0x00009E1A00000000 AS DATETIME));
420
+ INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (N'joe@joe.com',N'joe', N'somethingelse2', 54, CAST(0x00009E1A00000000 AS DATETIME));
421
+ INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (N'dontmurderme@direwolf.com',N'direwolf', N'somethingelse3', 44, CAST(0x00009E1A00000000 AS DATETIME));
422
+ INSERT [dbo].[another_table] ([a], [b], [c], [d]) VALUES (1,2,3,4);
423
+ INSERT [dbo].[another_table] ([a], [b], [c], [d]) VALUES (5,6,7,8);
424
+ INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (1,2,3,4);
425
+ INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (5,6,7,8);
426
+ INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello',N'kjhjd^&dkjh', N'aawefjkafe');
427
+ INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello1',N'kjhj!', 892938);
428
+ INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello2',N'moose!!', NULL);
429
+ INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello',N'kjhjd^&dkjh');
430
+ INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello1',N'kjhj!');
431
+ INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello2',N'moose!!');
432
+ SQL
433
+
434
+ @ddo = MyObfuscate.new({
435
+ :some_table => {
436
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
437
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
438
+ :age => {:type => :integer, :between => 10...80},
439
+ :bday => :keep
440
+ },
441
+ :another_table => :truncate,
442
+ :some_table_to_keep => :keep,
443
+ :one_more_table => {
444
+ # Note: fixed strings must be pre-SQL escaped!
445
+ :password => {:type => :fixed, :string => "monkey"},
446
+ :c => {:type => :null}
447
+ }
448
+ })
449
+ @ddo.database_type = :sql_server
450
+
451
+ @output = StringIO.new
452
+ $stderr = @error_output = StringIO.new
453
+ @ddo.obfuscate(@database_dump, @output)
454
+ $stderr = STDERR
455
+ @output.rewind
456
+ @output_string = @output.read
457
+ end
458
+
459
+ it "should be able to truncate tables" do
460
+ @output_string.should_not include("INSERT [dbo].[another_table]")
461
+ @output_string.should include("INSERT [dbo].[one_more_table]")
462
+ end
463
+
464
+ it "should be able to declare tables to keep" do
465
+ @output_string.should include("INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (1,2,3,4);")
466
+ @output_string.should include("INSERT [dbo].[some_table_to_keep] ([a], [b], [c], [d]) VALUES (5,6,7,8);")
467
+ end
468
+
469
+ it "should ignore tables that it doesn't know about, but should warn" do
470
+ @output_string.should include("INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello',N'kjhjd^&dkjh');")
471
+ @output_string.should include("INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello1',N'kjhj!');")
472
+ @output_string.should include("INSERT [dbo].[an_ignored_table] ([col], [col2]) VALUES (N'hello2',N'moose!!');")
473
+ @error_output.rewind
474
+ @error_output.read.should =~ /an_ignored_table was not specified in the config/
475
+ end
476
+
477
+ it "should obfuscate the tables" do
478
+ @output_string.should include("INSERT [dbo].[some_table] ([email], [name], [something], [age], [bday]) VALUES (")
479
+ @output_string.should include("CAST(0x00009E1A00000000 AS DATETIME)")
480
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (")
481
+ @output_string.should include("'some''thin,ge())lse1'")
482
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello',N'monkey',NULL);")
483
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello1',N'monkey',NULL);")
484
+ @output_string.should include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello2',N'monkey',NULL);")
485
+ @output_string.should_not include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello',N'kjhjd^&dkjh', N'aawefjkafe');")
486
+ @output_string.should_not include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello1',N'kjhj!', 892938);")
487
+ @output_string.should_not include("INSERT [dbo].[one_more_table] ([a], [password], [c], [d,d]) VALUES (N'hello2',N'moose!!', NULL);")
488
+ @output_string.should_not include("INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES (N'bob@honk.com',N'bob', N'some''thin,ge())lse1', 25, CAST(0x00009E1A00000000 AS DATETIME));")
489
+ @output_string.should_not include("INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES (N'joe@joe.com',N'joe', N'somethingelse2', 54, CAST(0x00009E1A00000000 AS DATETIME));")
490
+ end
491
+
492
+ it "honors a special case: on the people table, rows with anything@honk.com in a slot marked with :honk_email_skip do not change this slot" do
493
+ @output_string.should include("(N'bob@honk.com',")
494
+ @output_string.should include("(N'dontmurderme@direwolf.com',")
495
+ @output_string.should_not include("joe@joe.com")
496
+ end
497
+ end
498
+
499
+ context "when fail_on_unspecified_columns is set to true" do
500
+ before do
501
+ @database_dump = StringIO.new(<<-SQL)
502
+ INSERT INTO [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
503
+ SQL
504
+
505
+ @ddo = MyObfuscate.new({
506
+ :some_table => {
507
+ :email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
508
+ :name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
509
+ :age => {:type => :integer, :between => 10...80}
510
+ }
511
+ })
512
+ @ddo.database_type = :sql_server
513
+ @ddo.fail_on_unspecified_columns = true
514
+ end
515
+
516
+ it "should raise an exception when an unspecified column is found" do
517
+ lambda {
518
+ @ddo.obfuscate(@database_dump, StringIO.new)
519
+ }.should raise_error(/column 'something' defined/i)
520
+ end
521
+
522
+ it "should accept columns defined in globally_kept_columns" do
523
+ @ddo.globally_kept_columns = %w[something]
524
+ lambda {
525
+ @ddo.obfuscate(@database_dump, StringIO.new)
526
+ }.should_not raise_error
527
+ end
528
+ end
529
+ end
530
+ end
531
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe MyObfuscate::Mysql do
4
+ describe "#parse_insert_statement" do
5
+ it "should return nil for other SQL syntaxes (MS SQL Server)" do
6
+ subject.parse_insert_statement("INSERT [dbo].[TASKS] ([TaskID], [TaskName]) VALUES (61, N'Report Thing')").should be_nil
7
+ end
8
+
9
+ it "should return nil for MySQL non-insert statements" do
10
+ subject.parse_insert_statement("CREATE TABLE `some_table`;").should be_nil
11
+ end
12
+
13
+ it "should return a hash of table name, column names for MySQL insert statements" do
14
+ hash = subject.parse_insert_statement("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
15
+ hash.should == {:table_name => :some_table, :column_names => [:email, :name, :something, :age]}
16
+ end
17
+ end
18
+
19
+ describe "#rows_to_be_inserted" do
20
+ it "should split a mysql string into fields" do
21
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES ('bob@bob.com','bob', 'somethingelse1', 25, '2', 10, 'hi') ; "
22
+ fields = [['bob@bob.com', 'bob', 'somethingelse1', '25', '2', '10', "hi"]]
23
+ subject.rows_to_be_inserted(string).should == fields
24
+ end
25
+
26
+ it "should work ok with escaped characters" do
27
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES ('bob,@bob.c , om', 'bo\\', b', 'some\"thin\\gel\\\\\\'se1', 25, '2', 10, 'hi', 5) ; "
28
+ fields = [['bob,@bob.c , om', 'bo\\\', b', 'some"thin\\gel\\\\\\\'se1', '25', '2', '10', "hi", "5"]]
29
+ subject.rows_to_be_inserted(string).should == fields
30
+ end
31
+
32
+ it "should work with multiple subinserts" do
33
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES (1,2,3, '((m))(oo()s,e'), ('bob,@bob.c , om', 'bo\\', b', 'some\"thin\\gel\\\\\\'se1', 25, '2', 10, 'hi', 5) ;"
34
+ fields = [["1", "2", "3", "((m))(oo()s,e"], ['bob,@bob.c , om', 'bo\\\', b', 'some"thin\\gel\\\\\\\'se1', '25', '2', '10', "hi", "5"]]
35
+ subject.rows_to_be_inserted(string).should == fields
36
+ end
37
+
38
+ it "should work ok with NULL values" do
39
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES (NULL , 'bob@bob.com','bob', NULL, 25, '2', NULL, 'hi', NULL ); "
40
+ fields = [[nil, 'bob@bob.com', 'bob', nil, '25', '2', nil, "hi", nil]]
41
+ subject.rows_to_be_inserted(string).should == fields
42
+ end
43
+
44
+ it "should work with empty strings" do
45
+ string = "INSERT INTO `some_table` (thing1,thing2) VALUES (NULL , '', '' , '', 25, '2','', 'hi','') ;"
46
+ fields = [[nil, '', '', '', '25', '2', '', "hi", '']]
47
+ subject.rows_to_be_inserted(string).should == fields
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler.require(:default, :development)
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ require 'my_obfuscate'
7
+
8
+ RSpec.configure do |config|
9
+ # config.mock_with :rr
10
+ end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ describe MyObfuscate::SqlServer do
4
+ describe "#parse_insert_statement" do
5
+ it "should return a hash of table_name, column_names for SQL Server input statements" do
6
+ hash = subject.parse_insert_statement("INSERT [dbo].[TASKS] ([TaskID], [TaskName]) VALUES (61, N'Report Thing')")
7
+ hash.should == { :table_name => :TASKS, :column_names => [:TaskID, :TaskName] }
8
+ end
9
+
10
+ it "should return nil for SQL Server non-insert statements" do
11
+ subject.parse_insert_statement("CREATE TABLE [dbo].[WORKFLOW](").should be_nil
12
+ end
13
+
14
+ it "should return nil for non-SQL Server insert statements (MySQL)" do
15
+ subject.parse_insert_statement("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);").should be_nil
16
+ end
17
+ end
18
+
19
+ describe "#rows_to_be_inserted" do
20
+ it "should split a SQL Server string into fields" do
21
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (N'bob@bob.com',N'bob', N'somethingelse1',25, '2', 10, 'hi', CAST(0x00009E1A00000000 AS DATETIME)) ; "
22
+ fields = [['bob@bob.com', 'bob', 'somethingelse1', '25', '2', '10', "hi", "CAST(0x00009E1A00000000 AS DATETIME)"]]
23
+ subject.rows_to_be_inserted(string).should == fields
24
+ end
25
+
26
+ it "should work ok with single quote escape" do
27
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (N'bob,@bob.c , om', 'bo'', b', N'some\"thingel''se1', 25, '2', 10, 'hi', 5) ; "
28
+ fields = [['bob,@bob.c , om', "bo'', b", "some\"thingel''se1", '25', '2', '10', "hi", "5"]]
29
+ subject.rows_to_be_inserted(string).should == fields
30
+ end
31
+
32
+ it "should work ok with NULL values" do
33
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (NULL , N'bob@bob.com','bob', NULL, 25, N'2', NULL, 'hi', NULL ); "
34
+ fields = [[nil, 'bob@bob.com', 'bob', nil, '25', '2', nil, "hi", nil]]
35
+ subject.rows_to_be_inserted(string).should == fields
36
+ end
37
+
38
+ it "should work with empty strings" do
39
+ string = "INSERT [dbo].[some_table] ([thing1],[thing2]) VALUES (NULL , N'', '' , '', 25, '2','', N'hi','') ;"
40
+ fields = [[nil, '', '','', '25', '2', '', "hi", '']]
41
+ subject.rows_to_be_inserted(string).should == fields
42
+ end
43
+ end
44
+
45
+ describe "#make_valid_value_string" do
46
+ it "should output 'NULL' when the value is nil" do
47
+ subject.make_valid_value_string(nil).should == "NULL"
48
+ end
49
+
50
+ it "should enclose the value in quotes if it's a string" do
51
+ subject.make_valid_value_string("something").should == "N'something'"
52
+ end
53
+
54
+ it "should not enclose the value in quotes if it is a method call" do
55
+ subject.make_valid_value_string("CAST(0x00009E1A00000000 AS DATETIME)").should == "CAST(0x00009E1A00000000 AS DATETIME)"
56
+ end
57
+ end
58
+ end
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iterationlabs-my_obfuscate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Cantino
9
+ - Dave Willett
10
+ - Mike Grafton
11
+ - Mason Glaves
12
+ - Greg Bell
13
+ - Mavenlink
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+ date: 2012-04-10 00:00:00.000000000 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ requirement: !ruby/object:Gem::Requirement
22
+ none: false
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ type: :development
28
+ prerelease: false
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ! '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ - !ruby/object:Gem::Dependency
36
+ name: faker
37
+ requirement: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - '='
41
+ - !ruby/object:Gem::Version
42
+ version: 0.9.5
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - '='
49
+ - !ruby/object:Gem::Version
50
+ version: 0.9.5
51
+ description: Standalone Ruby code for the selective rewriting of MySQL dumps in order
52
+ to protect user privacy.
53
+ email: andrew@iterationlabs.com
54
+ executables: []
55
+ extensions: []
56
+ extra_rdoc_files: []
57
+ files:
58
+ - .gitignore
59
+ - Gemfile
60
+ - LICENSE
61
+ - README.rdoc
62
+ - Rakefile
63
+ - iterationlabs-my_obfuscate.gemspec
64
+ - lib/my_obfuscate.rb
65
+ - lib/my_obfuscate/mysql.rb
66
+ - lib/my_obfuscate/sql_server.rb
67
+ - lib/my_obfuscate/version.rb
68
+ - spec/my_obfuscate_spec.rb
69
+ - spec/mysql_spec.rb
70
+ - spec/spec_helper.rb
71
+ - spec/sql_server_spec.rb
72
+ homepage: http://github.com/iterationlabs/myobfuscate
73
+ licenses: []
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.21
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: Standalone Ruby code for the selective rewriting of MySQL dumps in order
96
+ to protect user privacy.
97
+ test_files: []