my_obfuscate 0.3.0 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README.rdoc +49 -14
- data/Rakefile +4 -56
- data/lib/my_obfuscate.rb +117 -82
- data/lib/my_obfuscate/mysql.rb +91 -0
- data/lib/my_obfuscate/sql_server.rb +81 -0
- data/lib/my_obfuscate/version.rb +3 -0
- data/my_obfuscate.gemspec +12 -44
- data/spec/my_obfuscate_spec.rb +442 -129
- data/spec/mysql_spec.rb +78 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/sql_server_spec.rb +58 -0
- metadata +70 -51
- data/.document +0 -5
- data/VERSION +0 -1
data/.gitignore
CHANGED
data/Gemfile
ADDED
data/README.rdoc
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
= my_obfuscate
|
2
2
|
|
3
|
-
Standalone Ruby code for the selective rewriting of
|
3
|
+
Standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. Supports MySQL and SQL Server.
|
4
4
|
|
5
5
|
= Install
|
6
6
|
|
7
|
-
sudo gem install my_obfuscate
|
7
|
+
(sudo) gem install my_obfuscate
|
8
8
|
|
9
9
|
= Example Usage
|
10
10
|
|
@@ -17,34 +17,65 @@ Make an obfuscator.rb script:
|
|
17
17
|
obfuscator = MyObfuscate.new({
|
18
18
|
:people => {
|
19
19
|
:email => { :type => :email, :skip_regexes => [/^[\w\.\_]+@my_company\.com$/i] },
|
20
|
-
:ethnicity =>
|
20
|
+
:ethnicity => :keep,
|
21
21
|
:crypted_password => { :type => :fixed, :string => "SOME_FIXED_PASSWORD_FOR_EASE_OF_DEBUGGING" },
|
22
22
|
:salt => { :type => :fixed, :string => "SOME_THING" },
|
23
|
-
:remember_token =>
|
24
|
-
:remember_token_expires_at =>
|
25
|
-
:
|
26
|
-
:
|
27
|
-
:
|
28
|
-
:
|
23
|
+
:remember_token => :null,
|
24
|
+
:remember_token_expires_at => :null,
|
25
|
+
:age => { :type => :null, :unless => lambda { |person| person[:email] == "hello@example.com" } },
|
26
|
+
:photo_file_name => :null,
|
27
|
+
:photo_content_type => :null,
|
28
|
+
:photo_file_size => :null,
|
29
|
+
:photo_updated_at => :null,
|
29
30
|
:postal_code => { :type => :fixed, :string => "94109", :unless => lambda {|person| person[:postal_code] == "12345"} },
|
30
|
-
:name =>
|
31
|
+
:name => :name,
|
32
|
+
:full_address => :address,
|
33
|
+
:bio => { :type => :lorem, :number => 4 },
|
31
34
|
:relationship_status => { :type => :fixed, :one_of => ["Single", "Divorced", "Married", "Engaged", "In a Relationship"] },
|
32
35
|
:has_children => { :type => :integer, :between => 0..1 },
|
33
36
|
},
|
34
37
|
|
35
38
|
:invites => :truncate,
|
36
39
|
:invite_requests => :truncate,
|
40
|
+
:tags => :keep,
|
37
41
|
|
38
42
|
:relationships => {
|
39
|
-
:account_id =>
|
43
|
+
:account_id => :keep,
|
40
44
|
:code => { :type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS }
|
41
45
|
}
|
42
46
|
})
|
47
|
+
obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
|
48
|
+
obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
|
43
49
|
obfuscator.obfuscate(STDIN, STDOUT)
|
44
50
|
|
45
51
|
And to get an obfuscated dump:
|
46
|
-
|
47
|
-
|
52
|
+
|
53
|
+
mysqldump -c --add-drop-table --hex-blob -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
|
54
|
+
|
55
|
+
Note that the -c option on mysqldump is required to use my_obfuscator. Additionally, the default behavior of mysqldump
|
56
|
+
is to output special characters. This may cause trouble, so you can request hex-encoded blob content with --hex-blob.
|
57
|
+
If you get MySQL errors due to very long lines, try some combination of --max_allowed_packet=128M, --single-transaction, --skip-extended-insert, and --quick.
|
58
|
+
|
59
|
+
== Database Server
|
60
|
+
|
61
|
+
By default the database type is assumed to be MySQL, but you can use the
|
62
|
+
builtin SQL Server support by specifying:
|
63
|
+
|
64
|
+
obfuscator.database_type = :sql_server
|
65
|
+
|
66
|
+
== Types
|
67
|
+
|
68
|
+
Available types include: email, string, lorem, name, first_name, last_name, address, street_address, city, state,
|
69
|
+
zip_code, phone, company, ipv4, ipv6, url, integer, fixed, null, and keep.
|
70
|
+
|
71
|
+
== Changes
|
72
|
+
|
73
|
+
* Support for SQL Server
|
74
|
+
* :unless and :if now support :nil as a shorthand for a Proc that checks for nil
|
75
|
+
* :name, :lorem, and :address are all now supported types. You can pass :number to :lorem to specify how many sentences to generate. The default is one.
|
76
|
+
* <tt>{ :type => :whatever }</tt> is now optional when no additional options are needed. Just use <tt>:whatever</tt>.
|
77
|
+
* Warnings are thrown when an unknown column type or table is encountered. Use <tt>:keep</tt> in both cases.
|
78
|
+
* <tt>{ :type => :fixed, :string => Proc { |row| ... } }</tt> is now available.
|
48
79
|
|
49
80
|
== Note on Patches/Pull Requests
|
50
81
|
|
@@ -54,6 +85,10 @@ Note that the -c option on mysqldump is required to use my_obfuscator.
|
|
54
85
|
* Commit, do not mess with rakefile, version, or history. (If you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
55
86
|
* Send me a pull request. Bonus points for topic branches.
|
56
87
|
|
88
|
+
== Thanks
|
89
|
+
|
90
|
+
Thanks to Mavenlink and Pivotal Labs for patches and updates!
|
91
|
+
|
57
92
|
== Copyright
|
58
93
|
|
59
|
-
Copyright (c) 2009 Honk. See LICENSE for details.
|
94
|
+
Copyright (c) 2009 Honk. Now maintained by Iteration Labs, LLC. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,60 +1,8 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
# rake version:bump:minor
|
7
|
-
# rake gemspec
|
8
|
-
# rake build
|
9
|
-
# rake rubyforge:release
|
10
|
-
# Then git checkin and commit
|
11
|
-
|
12
|
-
begin
|
13
|
-
require 'jeweler'
|
14
|
-
Jeweler::Tasks.new do |gem|
|
15
|
-
gem.name = "my_obfuscate"
|
16
|
-
gem.summary = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
|
17
|
-
gem.description = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
|
18
|
-
gem.email = "andrew@pivotallabs.com"
|
19
|
-
gem.homepage = "http://github.com/honkster/myobfuscate"
|
20
|
-
gem.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves"]
|
21
|
-
gem.add_development_dependency "rspec"
|
22
|
-
gem.rubyforge_project = 'my-obfuscate'
|
23
|
-
end
|
24
|
-
|
25
|
-
Jeweler::RubyforgeTasks.new do |rubyforge|
|
26
|
-
rubyforge.doc_task = "rdoc"
|
27
|
-
end
|
28
|
-
rescue LoadError
|
29
|
-
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
4
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
5
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
30
6
|
end
|
31
7
|
|
32
|
-
require 'spec/rake/spectask'
|
33
|
-
Spec::Rake::SpecTask.new(:spec) do |spec|
|
34
|
-
spec.libs << 'lib' << 'spec'
|
35
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
36
|
-
end
|
37
|
-
|
38
|
-
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
39
|
-
spec.libs << 'lib' << 'spec'
|
40
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
41
|
-
spec.rcov = true
|
42
|
-
end
|
43
|
-
|
44
|
-
task :spec => :check_dependencies
|
45
|
-
|
46
8
|
task :default => :spec
|
47
|
-
|
48
|
-
require 'rake/rdoctask'
|
49
|
-
Rake::RDocTask.new do |rdoc|
|
50
|
-
if File.exist?('VERSION')
|
51
|
-
version = File.read('VERSION')
|
52
|
-
else
|
53
|
-
version = ""
|
54
|
-
end
|
55
|
-
|
56
|
-
rdoc.rdoc_dir = 'rdoc'
|
57
|
-
rdoc.title = "my_obfuscate #{version}"
|
58
|
-
rdoc.rdoc_files.include('README*')
|
59
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
60
|
-
end
|
data/lib/my_obfuscate.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require 'jcode'
|
1
|
+
require 'jcode' if RUBY_VERSION < '1.9'
|
2
|
+
require 'faker'
|
2
3
|
|
3
4
|
# Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
|
4
5
|
# column names in the insert statements.
|
5
6
|
class MyObfuscate
|
6
|
-
attr_accessor :config
|
7
|
+
attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
|
7
8
|
|
8
|
-
INSERT_REGEX = /^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
|
9
9
|
NUMBER_CHARS = "1234567890"
|
10
10
|
USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
|
11
11
|
SENSIBLE_CHARS = USERNAME_CHARS + '+-=[{]}/?|!@#$%^&*()`~'
|
@@ -16,17 +16,35 @@ class MyObfuscate
|
|
16
16
|
@config = configuration
|
17
17
|
end
|
18
18
|
|
19
|
+
def fail_on_unspecified_columns?
|
20
|
+
@fail_on_unspecified_columns
|
21
|
+
end
|
22
|
+
|
23
|
+
def database_helper
|
24
|
+
if @database_helper.nil?
|
25
|
+
if @database_type == :sql_server
|
26
|
+
@database_helper = SqlServer.new
|
27
|
+
else
|
28
|
+
@database_helper = Mysql.new
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
@database_helper
|
33
|
+
end
|
34
|
+
|
19
35
|
# Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
|
20
36
|
# or STDIN and STDOUT.
|
21
37
|
def obfuscate(input_io, output_io)
|
38
|
+
|
22
39
|
# We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
|
23
40
|
input_io.each do |line|
|
24
|
-
if
|
25
|
-
table_name =
|
26
|
-
columns =
|
41
|
+
if table_data = database_helper.parse_insert_statement(line)
|
42
|
+
table_name = table_data[:table_name]
|
43
|
+
columns = table_data[:column_names]
|
27
44
|
if config[table_name]
|
28
45
|
output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
|
29
46
|
else
|
47
|
+
$stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
|
30
48
|
output_io.write line
|
31
49
|
end
|
32
50
|
else
|
@@ -35,93 +53,53 @@ class MyObfuscate
|
|
35
53
|
end
|
36
54
|
end
|
37
55
|
|
38
|
-
def
|
39
|
-
|
40
|
-
output = context_aware_mysql_string_split(line).map do |sub_insert|
|
56
|
+
def reassembling_each_insert(line, table_name, columns)
|
57
|
+
output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
|
41
58
|
result = yield(sub_insert)
|
42
59
|
result = result.map do |i|
|
43
|
-
|
44
|
-
"NULL"
|
45
|
-
else
|
46
|
-
"'" + i + "'"
|
47
|
-
end
|
60
|
+
database_helper.make_valid_value_string(i)
|
48
61
|
end
|
49
62
|
result = result.join(",")
|
50
63
|
"(" + result + ")"
|
51
64
|
end.join(",")
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
# Be aware, strings must be quoted in single quotes!
|
56
|
-
def self.context_aware_mysql_string_split(string)
|
57
|
-
in_sub_insert = false
|
58
|
-
in_quoted_string = false
|
59
|
-
escaped = false
|
60
|
-
current_field = nil
|
61
|
-
length = string.length
|
62
|
-
index = 0
|
63
|
-
fields = []
|
64
|
-
output = []
|
65
|
-
string.each_char do |i|
|
66
|
-
if escaped
|
67
|
-
escaped = false
|
68
|
-
current_field ||= ""
|
69
|
-
current_field << i
|
70
|
-
else
|
71
|
-
if i == "\\"
|
72
|
-
escaped = true
|
73
|
-
current_field ||= ""
|
74
|
-
current_field << i
|
75
|
-
elsif i == "(" && !in_quoted_string && !in_sub_insert
|
76
|
-
in_sub_insert = true
|
77
|
-
elsif i == ")" && !in_quoted_string && in_sub_insert
|
78
|
-
fields << current_field unless current_field.nil?
|
79
|
-
output << fields unless fields.length == 0
|
80
|
-
in_sub_insert = false
|
81
|
-
fields = []
|
82
|
-
current_field = nil
|
83
|
-
elsif i == "'" && !in_quoted_string
|
84
|
-
fields << current_field unless current_field.nil?
|
85
|
-
current_field = ''
|
86
|
-
in_quoted_string = true
|
87
|
-
elsif i == "'" && in_quoted_string
|
88
|
-
fields << current_field unless current_field.nil?
|
89
|
-
current_field = nil
|
90
|
-
in_quoted_string = false
|
91
|
-
elsif i == "," && !in_quoted_string && in_sub_insert
|
92
|
-
fields << current_field unless current_field.nil?
|
93
|
-
current_field = nil
|
94
|
-
elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
|
95
|
-
current_field = nil
|
96
|
-
fields << current_field
|
97
|
-
elsif (i == " " || i == "\t") && !in_quoted_string
|
98
|
-
# Don't add whitespace not in a string
|
99
|
-
elsif in_sub_insert
|
100
|
-
current_field ||= ""
|
101
|
-
current_field << i
|
102
|
-
end
|
103
|
-
end
|
104
|
-
index += 1
|
105
|
-
end
|
106
|
-
fields << current_field unless current_field.nil?
|
107
|
-
output << fields unless fields.length == 0
|
108
|
-
output
|
65
|
+
database_helper.make_insert_statement(table_name, columns, output)
|
109
66
|
end
|
110
67
|
|
111
68
|
def self.row_as_hash(row, columns)
|
112
69
|
columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
|
113
70
|
end
|
114
71
|
|
72
|
+
def self.make_conditional_method(conditional_method, index, row)
|
73
|
+
if conditional_method.is_a?(Symbol)
|
74
|
+
if conditional_method == :blank
|
75
|
+
conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
|
76
|
+
elsif conditional_method == :nil
|
77
|
+
conditional_method = lambda { |row_hash| row[index].nil? }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
conditional_method
|
81
|
+
end
|
82
|
+
|
115
83
|
def self.apply_table_config(row, table_config, columns)
|
116
84
|
return row unless table_config.is_a?(Hash)
|
117
85
|
row_hash = row_as_hash(row, columns)
|
118
86
|
|
119
87
|
table_config.each do |column, definition|
|
120
88
|
index = columns.index(column)
|
89
|
+
|
90
|
+
definition = { :type => definition } if definition.is_a?(Symbol)
|
91
|
+
|
92
|
+
if definition.has_key?(:unless)
|
93
|
+
unless_check = make_conditional_method(definition[:unless], index, row)
|
121
94
|
|
122
|
-
|
123
|
-
|
124
|
-
|
95
|
+
next if unless_check.call(row_hash)
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
if definition.has_key?(:if)
|
100
|
+
if_check = make_conditional_method(definition[:if], index, row)
|
101
|
+
|
102
|
+
next unless if_check.call(row_hash)
|
125
103
|
end
|
126
104
|
|
127
105
|
if definition[:skip_regexes]
|
@@ -130,20 +108,51 @@ class MyObfuscate
|
|
130
108
|
|
131
109
|
row[index.to_i] = case definition[:type]
|
132
110
|
when :email
|
133
|
-
|
111
|
+
clean_quotes(Faker::Internet.email)
|
134
112
|
when :string
|
135
|
-
random_string(definition[:length], definition[:chars] || SENSIBLE_CHARS)
|
113
|
+
random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
|
114
|
+
when :lorem
|
115
|
+
clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
|
116
|
+
when :name
|
117
|
+
clean_quotes(Faker::Name.name)
|
118
|
+
when :first_name
|
119
|
+
clean_quotes(Faker::Name.first_name)
|
120
|
+
when :last_name
|
121
|
+
clean_quotes(Faker::Name.last_name)
|
122
|
+
when :address
|
123
|
+
clean_quotes("#{Faker::Address.street_address}\\n#{Faker::Address.city}, #{Faker::Address.state_abbr} #{Faker::Address.zip_code}")
|
124
|
+
when :street_address
|
125
|
+
clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
|
126
|
+
when :city
|
127
|
+
clean_quotes(Faker::Address.city)
|
128
|
+
when :state
|
129
|
+
Faker::Address.state_abbr
|
130
|
+
when :zip_code
|
131
|
+
Faker::Address.zip_code
|
132
|
+
when :phone
|
133
|
+
Faker::PhoneNumber.phone_number
|
134
|
+
when :company
|
135
|
+
clean_bad_whitespace(clean_quotes(Faker::Company.name))
|
136
|
+
when :ipv4
|
137
|
+
Faker::Internet.ip_v4_address
|
138
|
+
when :ipv6
|
139
|
+
Faker::Internet.ip_v6_address
|
140
|
+
when :url
|
141
|
+
clean_bad_whitespace(Faker::Internet.url)
|
136
142
|
when :integer
|
137
143
|
random_integer(definition[:between] || (0..1000)).to_s
|
138
144
|
when :fixed
|
139
145
|
if definition[:one_of]
|
140
146
|
definition[:one_of][(rand * definition[:one_of].length).to_i]
|
141
147
|
else
|
142
|
-
definition[:string]
|
148
|
+
definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
|
143
149
|
end
|
144
150
|
when :null
|
145
151
|
nil
|
152
|
+
when :keep
|
153
|
+
row[index]
|
146
154
|
else
|
155
|
+
$stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
|
147
156
|
row[index]
|
148
157
|
end
|
149
158
|
end
|
@@ -162,26 +171,52 @@ class MyObfuscate
|
|
162
171
|
out
|
163
172
|
end
|
164
173
|
|
165
|
-
def
|
174
|
+
def check_for_defined_columns_not_in_table(table_name, columns)
|
166
175
|
missing_columns = config[table_name].keys - columns
|
167
176
|
unless missing_columns.length == 0
|
168
177
|
error_message = missing_columns.map do |missing_column|
|
169
178
|
"Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
|
170
179
|
end.join("\n")
|
171
|
-
|
180
|
+
raise RuntimeError.new(error_message)
|
172
181
|
end
|
173
182
|
end
|
174
183
|
|
175
|
-
def
|
184
|
+
def check_for_table_columns_not_in_definition(table_name, columns)
|
185
|
+
missing_columns = columns - (config[table_name].keys + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
|
186
|
+
unless missing_columns.length == 0
|
187
|
+
error_message = missing_columns.map do |missing_column|
|
188
|
+
"Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
|
189
|
+
end.join("\n")
|
190
|
+
raise RuntimeError.new(error_message)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def obfuscate_bulk_insert_line(line, table_name, columns)
|
176
195
|
table_config = config[table_name]
|
177
196
|
if table_config == :truncate
|
178
197
|
""
|
198
|
+
elsif table_config == :keep
|
199
|
+
line
|
179
200
|
else
|
180
|
-
|
201
|
+
check_for_defined_columns_not_in_table(table_name, columns)
|
202
|
+
check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
|
181
203
|
# Note: Remember to SQL escape strings in what you pass back.
|
182
|
-
|
204
|
+
reassembling_each_insert(line, table_name, columns) do |row|
|
183
205
|
MyObfuscate.apply_table_config(row, table_config, columns)
|
184
206
|
end
|
185
207
|
end
|
186
208
|
end
|
209
|
+
|
210
|
+
private
|
211
|
+
|
212
|
+
def self.clean_quotes(value)
|
213
|
+
value.gsub(/['"]/, '')
|
214
|
+
end
|
215
|
+
|
216
|
+
def self.clean_bad_whitespace(value)
|
217
|
+
value.gsub(/[\n\t\r]/, '')
|
218
|
+
end
|
187
219
|
end
|
220
|
+
|
221
|
+
require 'my_obfuscate/mysql'
|
222
|
+
require 'my_obfuscate/sql_server'
|