my_obfuscate 0.3.0 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README.rdoc +49 -14
- data/Rakefile +4 -56
- data/lib/my_obfuscate.rb +117 -82
- data/lib/my_obfuscate/mysql.rb +91 -0
- data/lib/my_obfuscate/sql_server.rb +81 -0
- data/lib/my_obfuscate/version.rb +3 -0
- data/my_obfuscate.gemspec +12 -44
- data/spec/my_obfuscate_spec.rb +442 -129
- data/spec/mysql_spec.rb +78 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/sql_server_spec.rb +58 -0
- metadata +70 -51
- data/.document +0 -5
- data/VERSION +0 -1
data/.gitignore
CHANGED
data/Gemfile
ADDED
data/README.rdoc
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
= my_obfuscate
|
2
2
|
|
3
|
-
Standalone Ruby code for the selective rewriting of
|
3
|
+
Standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. Supports MySQL and SQL Server.
|
4
4
|
|
5
5
|
= Install
|
6
6
|
|
7
|
-
sudo gem install my_obfuscate
|
7
|
+
(sudo) gem install my_obfuscate
|
8
8
|
|
9
9
|
= Example Usage
|
10
10
|
|
@@ -17,34 +17,65 @@ Make an obfuscator.rb script:
|
|
17
17
|
obfuscator = MyObfuscate.new({
|
18
18
|
:people => {
|
19
19
|
:email => { :type => :email, :skip_regexes => [/^[\w\.\_]+@my_company\.com$/i] },
|
20
|
-
:ethnicity =>
|
20
|
+
:ethnicity => :keep,
|
21
21
|
:crypted_password => { :type => :fixed, :string => "SOME_FIXED_PASSWORD_FOR_EASE_OF_DEBUGGING" },
|
22
22
|
:salt => { :type => :fixed, :string => "SOME_THING" },
|
23
|
-
:remember_token =>
|
24
|
-
:remember_token_expires_at =>
|
25
|
-
:
|
26
|
-
:
|
27
|
-
:
|
28
|
-
:
|
23
|
+
:remember_token => :null,
|
24
|
+
:remember_token_expires_at => :null,
|
25
|
+
:age => { :type => :null, :unless => lambda { |person| person[:email] == "hello@example.com" } },
|
26
|
+
:photo_file_name => :null,
|
27
|
+
:photo_content_type => :null,
|
28
|
+
:photo_file_size => :null,
|
29
|
+
:photo_updated_at => :null,
|
29
30
|
:postal_code => { :type => :fixed, :string => "94109", :unless => lambda {|person| person[:postal_code] == "12345"} },
|
30
|
-
:name =>
|
31
|
+
:name => :name,
|
32
|
+
:full_address => :address,
|
33
|
+
:bio => { :type => :lorem, :number => 4 },
|
31
34
|
:relationship_status => { :type => :fixed, :one_of => ["Single", "Divorced", "Married", "Engaged", "In a Relationship"] },
|
32
35
|
:has_children => { :type => :integer, :between => 0..1 },
|
33
36
|
},
|
34
37
|
|
35
38
|
:invites => :truncate,
|
36
39
|
:invite_requests => :truncate,
|
40
|
+
:tags => :keep,
|
37
41
|
|
38
42
|
:relationships => {
|
39
|
-
:account_id =>
|
43
|
+
:account_id => :keep,
|
40
44
|
:code => { :type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS }
|
41
45
|
}
|
42
46
|
})
|
47
|
+
obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
|
48
|
+
obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
|
43
49
|
obfuscator.obfuscate(STDIN, STDOUT)
|
44
50
|
|
45
51
|
And to get an obfuscated dump:
|
46
|
-
|
47
|
-
|
52
|
+
|
53
|
+
mysqldump -c --add-drop-table --hex-blob -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
|
54
|
+
|
55
|
+
Note that the -c option on mysqldump is required to use my_obfuscator. Additionally, the default behavior of mysqldump
|
56
|
+
is to output special characters. This may cause trouble, so you can request hex-encoded blob content with --hex-blob.
|
57
|
+
If you get MySQL errors due to very long lines, try some combination of --max_allowed_packet=128M, --single-transaction, --skip-extended-insert, and --quick.
|
58
|
+
|
59
|
+
== Database Server
|
60
|
+
|
61
|
+
By default the database type is assumed to be MySQL, but you can use the
|
62
|
+
builtin SQL Server support by specifying:
|
63
|
+
|
64
|
+
obfuscator.database_type = :sql_server
|
65
|
+
|
66
|
+
== Types
|
67
|
+
|
68
|
+
Available types include: email, string, lorem, name, first_name, last_name, address, street_address, city, state,
|
69
|
+
zip_code, phone, company, ipv4, ipv6, url, integer, fixed, null, and keep.
|
70
|
+
|
71
|
+
== Changes
|
72
|
+
|
73
|
+
* Support for SQL Server
|
74
|
+
* :unless and :if now support :nil as a shorthand for a Proc that checks for nil
|
75
|
+
* :name, :lorem, and :address are all now supported types. You can pass :number to :lorem to specify how many sentences to generate. The default is one.
|
76
|
+
* <tt>{ :type => :whatever }</tt> is now optional when no additional options are needed. Just use <tt>:whatever</tt>.
|
77
|
+
* Warnings are thrown when an unknown column type or table is encountered. Use <tt>:keep</tt> in both cases.
|
78
|
+
* <tt>{ :type => :fixed, :string => Proc { |row| ... } }</tt> is now available.
|
48
79
|
|
49
80
|
== Note on Patches/Pull Requests
|
50
81
|
|
@@ -54,6 +85,10 @@ Note that the -c option on mysqldump is required to use my_obfuscator.
|
|
54
85
|
* Commit, do not mess with rakefile, version, or history. (If you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
55
86
|
* Send me a pull request. Bonus points for topic branches.
|
56
87
|
|
88
|
+
== Thanks
|
89
|
+
|
90
|
+
Thanks to Mavenlink and Pivotal Labs for patches and updates!
|
91
|
+
|
57
92
|
== Copyright
|
58
93
|
|
59
|
-
Copyright (c) 2009 Honk. See LICENSE for details.
|
94
|
+
Copyright (c) 2009 Honk. Now maintained by Iteration Labs, LLC. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,60 +1,8 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
# rake version:bump:minor
|
7
|
-
# rake gemspec
|
8
|
-
# rake build
|
9
|
-
# rake rubyforge:release
|
10
|
-
# Then git checkin and commit
|
11
|
-
|
12
|
-
begin
|
13
|
-
require 'jeweler'
|
14
|
-
Jeweler::Tasks.new do |gem|
|
15
|
-
gem.name = "my_obfuscate"
|
16
|
-
gem.summary = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
|
17
|
-
gem.description = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
|
18
|
-
gem.email = "andrew@pivotallabs.com"
|
19
|
-
gem.homepage = "http://github.com/honkster/myobfuscate"
|
20
|
-
gem.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves"]
|
21
|
-
gem.add_development_dependency "rspec"
|
22
|
-
gem.rubyforge_project = 'my-obfuscate'
|
23
|
-
end
|
24
|
-
|
25
|
-
Jeweler::RubyforgeTasks.new do |rubyforge|
|
26
|
-
rubyforge.doc_task = "rdoc"
|
27
|
-
end
|
28
|
-
rescue LoadError
|
29
|
-
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
4
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
5
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
30
6
|
end
|
31
7
|
|
32
|
-
require 'spec/rake/spectask'
|
33
|
-
Spec::Rake::SpecTask.new(:spec) do |spec|
|
34
|
-
spec.libs << 'lib' << 'spec'
|
35
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
36
|
-
end
|
37
|
-
|
38
|
-
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
39
|
-
spec.libs << 'lib' << 'spec'
|
40
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
41
|
-
spec.rcov = true
|
42
|
-
end
|
43
|
-
|
44
|
-
task :spec => :check_dependencies
|
45
|
-
|
46
8
|
task :default => :spec
|
47
|
-
|
48
|
-
require 'rake/rdoctask'
|
49
|
-
Rake::RDocTask.new do |rdoc|
|
50
|
-
if File.exist?('VERSION')
|
51
|
-
version = File.read('VERSION')
|
52
|
-
else
|
53
|
-
version = ""
|
54
|
-
end
|
55
|
-
|
56
|
-
rdoc.rdoc_dir = 'rdoc'
|
57
|
-
rdoc.title = "my_obfuscate #{version}"
|
58
|
-
rdoc.rdoc_files.include('README*')
|
59
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
|
-
end
|
data/lib/my_obfuscate.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require 'jcode'
|
1
|
+
require 'jcode' if RUBY_VERSION < '1.9'
|
2
|
+
require 'faker'
|
2
3
|
|
3
4
|
# Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
|
4
5
|
# column names in the insert statements.
|
5
6
|
class MyObfuscate
|
6
|
-
attr_accessor :config
|
7
|
+
attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
|
7
8
|
|
8
|
-
INSERT_REGEX = /^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
|
9
9
|
NUMBER_CHARS = "1234567890"
|
10
10
|
USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
|
11
11
|
SENSIBLE_CHARS = USERNAME_CHARS + '+-=[{]}/?|!@#$%^&*()`~'
|
@@ -16,17 +16,35 @@ class MyObfuscate
|
|
16
16
|
@config = configuration
|
17
17
|
end
|
18
18
|
|
19
|
+
def fail_on_unspecified_columns?
|
20
|
+
@fail_on_unspecified_columns
|
21
|
+
end
|
22
|
+
|
23
|
+
def database_helper
|
24
|
+
if @database_helper.nil?
|
25
|
+
if @database_type == :sql_server
|
26
|
+
@database_helper = SqlServer.new
|
27
|
+
else
|
28
|
+
@database_helper = Mysql.new
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
@database_helper
|
33
|
+
end
|
34
|
+
|
19
35
|
# Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
|
20
36
|
# or STDIN and STDOUT.
|
21
37
|
def obfuscate(input_io, output_io)
|
38
|
+
|
22
39
|
# We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
|
23
40
|
input_io.each do |line|
|
24
|
-
if
|
25
|
-
table_name =
|
26
|
-
columns =
|
41
|
+
if table_data = database_helper.parse_insert_statement(line)
|
42
|
+
table_name = table_data[:table_name]
|
43
|
+
columns = table_data[:column_names]
|
27
44
|
if config[table_name]
|
28
45
|
output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
|
29
46
|
else
|
47
|
+
$stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
|
30
48
|
output_io.write line
|
31
49
|
end
|
32
50
|
else
|
@@ -35,93 +53,53 @@ class MyObfuscate
|
|
35
53
|
end
|
36
54
|
end
|
37
55
|
|
38
|
-
def
|
39
|
-
|
40
|
-
output = context_aware_mysql_string_split(line).map do |sub_insert|
|
56
|
+
def reassembling_each_insert(line, table_name, columns)
|
57
|
+
output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
|
41
58
|
result = yield(sub_insert)
|
42
59
|
result = result.map do |i|
|
43
|
-
|
44
|
-
"NULL"
|
45
|
-
else
|
46
|
-
"'" + i + "'"
|
47
|
-
end
|
60
|
+
database_helper.make_valid_value_string(i)
|
48
61
|
end
|
49
62
|
result = result.join(",")
|
50
63
|
"(" + result + ")"
|
51
64
|
end.join(",")
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
# Be aware, strings must be quoted in single quotes!
|
56
|
-
def self.context_aware_mysql_string_split(string)
|
57
|
-
in_sub_insert = false
|
58
|
-
in_quoted_string = false
|
59
|
-
escaped = false
|
60
|
-
current_field = nil
|
61
|
-
length = string.length
|
62
|
-
index = 0
|
63
|
-
fields = []
|
64
|
-
output = []
|
65
|
-
string.each_char do |i|
|
66
|
-
if escaped
|
67
|
-
escaped = false
|
68
|
-
current_field ||= ""
|
69
|
-
current_field << i
|
70
|
-
else
|
71
|
-
if i == "\\"
|
72
|
-
escaped = true
|
73
|
-
current_field ||= ""
|
74
|
-
current_field << i
|
75
|
-
elsif i == "(" && !in_quoted_string && !in_sub_insert
|
76
|
-
in_sub_insert = true
|
77
|
-
elsif i == ")" && !in_quoted_string && in_sub_insert
|
78
|
-
fields << current_field unless current_field.nil?
|
79
|
-
output << fields unless fields.length == 0
|
80
|
-
in_sub_insert = false
|
81
|
-
fields = []
|
82
|
-
current_field = nil
|
83
|
-
elsif i == "'" && !in_quoted_string
|
84
|
-
fields << current_field unless current_field.nil?
|
85
|
-
current_field = ''
|
86
|
-
in_quoted_string = true
|
87
|
-
elsif i == "'" && in_quoted_string
|
88
|
-
fields << current_field unless current_field.nil?
|
89
|
-
current_field = nil
|
90
|
-
in_quoted_string = false
|
91
|
-
elsif i == "," && !in_quoted_string && in_sub_insert
|
92
|
-
fields << current_field unless current_field.nil?
|
93
|
-
current_field = nil
|
94
|
-
elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
|
95
|
-
current_field = nil
|
96
|
-
fields << current_field
|
97
|
-
elsif (i == " " || i == "\t") && !in_quoted_string
|
98
|
-
# Don't add whitespace not in a string
|
99
|
-
elsif in_sub_insert
|
100
|
-
current_field ||= ""
|
101
|
-
current_field << i
|
102
|
-
end
|
103
|
-
end
|
104
|
-
index += 1
|
105
|
-
end
|
106
|
-
fields << current_field unless current_field.nil?
|
107
|
-
output << fields unless fields.length == 0
|
108
|
-
output
|
65
|
+
database_helper.make_insert_statement(table_name, columns, output)
|
109
66
|
end
|
110
67
|
|
111
68
|
def self.row_as_hash(row, columns)
|
112
69
|
columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
|
113
70
|
end
|
114
71
|
|
72
|
+
def self.make_conditional_method(conditional_method, index, row)
|
73
|
+
if conditional_method.is_a?(Symbol)
|
74
|
+
if conditional_method == :blank
|
75
|
+
conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
|
76
|
+
elsif conditional_method == :nil
|
77
|
+
conditional_method = lambda { |row_hash| row[index].nil? }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
conditional_method
|
81
|
+
end
|
82
|
+
|
115
83
|
def self.apply_table_config(row, table_config, columns)
|
116
84
|
return row unless table_config.is_a?(Hash)
|
117
85
|
row_hash = row_as_hash(row, columns)
|
118
86
|
|
119
87
|
table_config.each do |column, definition|
|
120
88
|
index = columns.index(column)
|
89
|
+
|
90
|
+
definition = { :type => definition } if definition.is_a?(Symbol)
|
91
|
+
|
92
|
+
if definition.has_key?(:unless)
|
93
|
+
unless_check = make_conditional_method(definition[:unless], index, row)
|
121
94
|
|
122
|
-
|
123
|
-
|
124
|
-
|
95
|
+
next if unless_check.call(row_hash)
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
if definition.has_key?(:if)
|
100
|
+
if_check = make_conditional_method(definition[:if], index, row)
|
101
|
+
|
102
|
+
next unless if_check.call(row_hash)
|
125
103
|
end
|
126
104
|
|
127
105
|
if definition[:skip_regexes]
|
@@ -130,20 +108,51 @@ class MyObfuscate
|
|
130
108
|
|
131
109
|
row[index.to_i] = case definition[:type]
|
132
110
|
when :email
|
133
|
-
|
111
|
+
clean_quotes(Faker::Internet.email)
|
134
112
|
when :string
|
135
|
-
random_string(definition[:length], definition[:chars] || SENSIBLE_CHARS)
|
113
|
+
random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
|
114
|
+
when :lorem
|
115
|
+
clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
|
116
|
+
when :name
|
117
|
+
clean_quotes(Faker::Name.name)
|
118
|
+
when :first_name
|
119
|
+
clean_quotes(Faker::Name.first_name)
|
120
|
+
when :last_name
|
121
|
+
clean_quotes(Faker::Name.last_name)
|
122
|
+
when :address
|
123
|
+
clean_quotes("#{Faker::Address.street_address}\\n#{Faker::Address.city}, #{Faker::Address.state_abbr} #{Faker::Address.zip_code}")
|
124
|
+
when :street_address
|
125
|
+
clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
|
126
|
+
when :city
|
127
|
+
clean_quotes(Faker::Address.city)
|
128
|
+
when :state
|
129
|
+
Faker::Address.state_abbr
|
130
|
+
when :zip_code
|
131
|
+
Faker::Address.zip_code
|
132
|
+
when :phone
|
133
|
+
Faker::PhoneNumber.phone_number
|
134
|
+
when :company
|
135
|
+
clean_bad_whitespace(clean_quotes(Faker::Company.name))
|
136
|
+
when :ipv4
|
137
|
+
Faker::Internet.ip_v4_address
|
138
|
+
when :ipv6
|
139
|
+
Faker::Internet.ip_v6_address
|
140
|
+
when :url
|
141
|
+
clean_bad_whitespace(Faker::Internet.url)
|
136
142
|
when :integer
|
137
143
|
random_integer(definition[:between] || (0..1000)).to_s
|
138
144
|
when :fixed
|
139
145
|
if definition[:one_of]
|
140
146
|
definition[:one_of][(rand * definition[:one_of].length).to_i]
|
141
147
|
else
|
142
|
-
definition[:string]
|
148
|
+
definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
|
143
149
|
end
|
144
150
|
when :null
|
145
151
|
nil
|
152
|
+
when :keep
|
153
|
+
row[index]
|
146
154
|
else
|
155
|
+
$stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
|
147
156
|
row[index]
|
148
157
|
end
|
149
158
|
end
|
@@ -162,26 +171,52 @@ class MyObfuscate
|
|
162
171
|
out
|
163
172
|
end
|
164
173
|
|
165
|
-
def
|
174
|
+
def check_for_defined_columns_not_in_table(table_name, columns)
|
166
175
|
missing_columns = config[table_name].keys - columns
|
167
176
|
unless missing_columns.length == 0
|
168
177
|
error_message = missing_columns.map do |missing_column|
|
169
178
|
"Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
|
170
179
|
end.join("\n")
|
171
|
-
|
180
|
+
raise RuntimeError.new(error_message)
|
172
181
|
end
|
173
182
|
end
|
174
183
|
|
175
|
-
def
|
184
|
+
def check_for_table_columns_not_in_definition(table_name, columns)
|
185
|
+
missing_columns = columns - (config[table_name].keys + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
|
186
|
+
unless missing_columns.length == 0
|
187
|
+
error_message = missing_columns.map do |missing_column|
|
188
|
+
"Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
|
189
|
+
end.join("\n")
|
190
|
+
raise RuntimeError.new(error_message)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def obfuscate_bulk_insert_line(line, table_name, columns)
|
176
195
|
table_config = config[table_name]
|
177
196
|
if table_config == :truncate
|
178
197
|
""
|
198
|
+
elsif table_config == :keep
|
199
|
+
line
|
179
200
|
else
|
180
|
-
|
201
|
+
check_for_defined_columns_not_in_table(table_name, columns)
|
202
|
+
check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
|
181
203
|
# Note: Remember to SQL escape strings in what you pass back.
|
182
|
-
|
204
|
+
reassembling_each_insert(line, table_name, columns) do |row|
|
183
205
|
MyObfuscate.apply_table_config(row, table_config, columns)
|
184
206
|
end
|
185
207
|
end
|
186
208
|
end
|
209
|
+
|
210
|
+
private
|
211
|
+
|
212
|
+
def self.clean_quotes(value)
|
213
|
+
value.gsub(/['"]/, '')
|
214
|
+
end
|
215
|
+
|
216
|
+
def self.clean_bad_whitespace(value)
|
217
|
+
value.gsub(/[\n\t\r]/, '')
|
218
|
+
end
|
187
219
|
end
|
220
|
+
|
221
|
+
require 'my_obfuscate/mysql'
|
222
|
+
require 'my_obfuscate/sql_server'
|