my_obfuscate 0.3.0 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -4,3 +4,7 @@ coverage
4
4
  rdoc
5
5
  pkg
6
6
  .idea
7
+ .rvmrc
8
+ Gemfile.lock
9
+ *.deb
10
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in my_obfuscate.gemspec
4
+ gemspec
data/README.rdoc CHANGED
@@ -1,10 +1,10 @@
1
1
  = my_obfuscate
2
2
 
3
- Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.
3
+ Standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. Supports MySQL and SQL Server.
4
4
 
5
5
  = Install
6
6
 
7
- sudo gem install my_obfuscate
7
+ (sudo) gem install my_obfuscate
8
8
 
9
9
  = Example Usage
10
10
 
@@ -17,34 +17,65 @@ Make an obfuscator.rb script:
17
17
  obfuscator = MyObfuscate.new({
18
18
  :people => {
19
19
  :email => { :type => :email, :skip_regexes => [/^[\w\.\_]+@my_company\.com$/i] },
20
- :ethnicity => { :type => :null },
20
+ :ethnicity => :keep,
21
21
  :crypted_password => { :type => :fixed, :string => "SOME_FIXED_PASSWORD_FOR_EASE_OF_DEBUGGING" },
22
22
  :salt => { :type => :fixed, :string => "SOME_THING" },
23
- :remember_token => { :type => :null },
24
- :remember_token_expires_at => { :type => :null },
25
- :photo_file_name => { :type => :null },
26
- :photo_content_type => { :type => :null },
27
- :photo_file_size => { :type => :null },
28
- :photo_updated_at => { :type => :null },
23
+ :remember_token => :null,
24
+ :remember_token_expires_at => :null,
25
+ :age => { :type => :null, :unless => lambda { |person| person[:email] == "hello@example.com" } },
26
+ :photo_file_name => :null,
27
+ :photo_content_type => :null,
28
+ :photo_file_size => :null,
29
+ :photo_updated_at => :null,
29
30
  :postal_code => { :type => :fixed, :string => "94109", :unless => lambda {|person| person[:postal_code] == "12345"} },
30
- :name => { :type => :fixed, :string => "Production User", :if => lambda {|person| person[:email] == "hello@example.com"} },
31
+ :name => :name,
32
+ :full_address => :address,
33
+ :bio => { :type => :lorem, :number => 4 },
31
34
  :relationship_status => { :type => :fixed, :one_of => ["Single", "Divorced", "Married", "Engaged", "In a Relationship"] },
32
35
  :has_children => { :type => :integer, :between => 0..1 },
33
36
  },
34
37
 
35
38
  :invites => :truncate,
36
39
  :invite_requests => :truncate,
40
+ :tags => :keep,
37
41
 
38
42
  :relationships => {
39
- :account_id => { :type => :string, :length => 8, :chars => MyObfuscate::NUMBER_CHARS },
43
+ :account_id => :keep,
40
44
  :code => { :type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS }
41
45
  }
42
46
  })
47
+ obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
48
+ obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
43
49
  obfuscator.obfuscate(STDIN, STDOUT)
44
50
 
45
51
  And to get an obfuscated dump:
46
- mysqldump -c --add-drop-table -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
47
- Note that the -c option on mysqldump is required to use my_obfuscator.
52
+
53
+ mysqldump -c --add-drop-table --hex-blob -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
54
+
55
+ Note that the -c option on mysqldump is required to use my_obfuscator. Additionally, the default behavior of mysqldump
56
+ is to output special characters. This may cause trouble, so you can request hex-encoded blob content with --hex-blob.
57
+ If you get MySQL errors due to very long lines, try some combination of --max_allowed_packet=128M, --single-transaction, --skip-extended-insert, and --quick.
58
+
59
+ == Database Server
60
+
61
+ By default the database type is assumed to be MySQL, but you can use the
62
+ builtin SQL Server support by specifying:
63
+
64
+ obfuscator.database_type = :sql_server
65
+
66
+ == Types
67
+
68
+ Available types include: email, string, lorem, name, first_name, last_name, address, street_address, city, state,
69
+ zip_code, phone, company, ipv4, ipv6, url, integer, fixed, null, and keep.
70
+
71
+ == Changes
72
+
73
+ * Support for SQL Server
74
+ * :unless and :if now support :nil as a shorthand for a Proc that checks for nil
75
+ * :name, :lorem, and :address are all now supported types. You can pass :number to :lorem to specify how many sentences to generate. The default is one.
76
+ * <tt>{ :type => :whatever }</tt> is now optional when no additional options are needed. Just use <tt>:whatever</tt>.
77
+ * Warnings are thrown when an unknown column type or table is encountered. Use <tt>:keep</tt> in both cases.
78
+ * <tt>{ :type => :fixed, :string => Proc { |row| ... } }</tt> is now available.
48
79
 
49
80
  == Note on Patches/Pull Requests
50
81
 
@@ -54,6 +85,10 @@ Note that the -c option on mysqldump is required to use my_obfuscator.
54
85
  * Commit, do not mess with rakefile, version, or history. (If you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
55
86
  * Send me a pull request. Bonus points for topic branches.
56
87
 
88
+ == Thanks
89
+
90
+ Thanks to Mavenlink and Pivotal Labs for patches and updates!
91
+
57
92
  == Copyright
58
93
 
59
- Copyright (c) 2009 Honk. See LICENSE for details.
94
+ Copyright (c) 2009 Honk. Now maintained by Iteration Labs, LLC. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,60 +1,8 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
-
5
- # When updating:
6
- # rake version:bump:minor
7
- # rake gemspec
8
- # rake build
9
- # rake rubyforge:release
10
- # Then git checkin and commit
11
-
12
- begin
13
- require 'jeweler'
14
- Jeweler::Tasks.new do |gem|
15
- gem.name = "my_obfuscate"
16
- gem.summary = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
17
- gem.description = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
18
- gem.email = "andrew@pivotallabs.com"
19
- gem.homepage = "http://github.com/honkster/myobfuscate"
20
- gem.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves"]
21
- gem.add_development_dependency "rspec"
22
- gem.rubyforge_project = 'my-obfuscate'
23
- end
24
-
25
- Jeweler::RubyforgeTasks.new do |rubyforge|
26
- rubyforge.doc_task = "rdoc"
27
- end
28
- rescue LoadError
29
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
4
+ RSpec::Core::RakeTask.new(:spec) do |spec|
5
+ spec.pattern = FileList['spec/**/*_spec.rb']
30
6
  end
31
7
 
32
- require 'spec/rake/spectask'
33
- Spec::Rake::SpecTask.new(:spec) do |spec|
34
- spec.libs << 'lib' << 'spec'
35
- spec.spec_files = FileList['spec/**/*_spec.rb']
36
- end
37
-
38
- Spec::Rake::SpecTask.new(:rcov) do |spec|
39
- spec.libs << 'lib' << 'spec'
40
- spec.pattern = 'spec/**/*_spec.rb'
41
- spec.rcov = true
42
- end
43
-
44
- task :spec => :check_dependencies
45
-
46
8
  task :default => :spec
47
-
48
- require 'rake/rdoctask'
49
- Rake::RDocTask.new do |rdoc|
50
- if File.exist?('VERSION')
51
- version = File.read('VERSION')
52
- else
53
- version = ""
54
- end
55
-
56
- rdoc.rdoc_dir = 'rdoc'
57
- rdoc.title = "my_obfuscate #{version}"
58
- rdoc.rdoc_files.include('README*')
59
- rdoc.rdoc_files.include('lib/**/*.rb')
60
- end
data/lib/my_obfuscate.rb CHANGED
@@ -1,11 +1,11 @@
1
- require 'jcode'
1
+ require 'jcode' if RUBY_VERSION < '1.9'
2
+ require 'faker'
2
3
 
3
4
  # Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
4
5
  # column names in the insert statements.
5
6
  class MyObfuscate
6
- attr_accessor :config
7
+ attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
7
8
 
8
- INSERT_REGEX = /^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
9
9
  NUMBER_CHARS = "1234567890"
10
10
  USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
11
11
  SENSIBLE_CHARS = USERNAME_CHARS + '+-=[{]}/?|!@#$%^&*()`~'
@@ -16,17 +16,35 @@ class MyObfuscate
16
16
  @config = configuration
17
17
  end
18
18
 
19
+ def fail_on_unspecified_columns?
20
+ @fail_on_unspecified_columns
21
+ end
22
+
23
+ def database_helper
24
+ if @database_helper.nil?
25
+ if @database_type == :sql_server
26
+ @database_helper = SqlServer.new
27
+ else
28
+ @database_helper = Mysql.new
29
+ end
30
+ end
31
+
32
+ @database_helper
33
+ end
34
+
19
35
  # Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
20
36
  # or STDIN and STDOUT.
21
37
  def obfuscate(input_io, output_io)
38
+
22
39
  # We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
23
40
  input_io.each do |line|
24
- if regex_result = INSERT_REGEX.match(line)
25
- table_name = regex_result[1].to_sym
26
- columns = regex_result[2].split(/`\s*,\s*`/).map { |col| col.gsub('`',"").to_sym }
41
+ if table_data = database_helper.parse_insert_statement(line)
42
+ table_name = table_data[:table_name]
43
+ columns = table_data[:column_names]
27
44
  if config[table_name]
28
45
  output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
29
46
  else
47
+ $stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
30
48
  output_io.write line
31
49
  end
32
50
  else
@@ -35,93 +53,53 @@ class MyObfuscate
35
53
  end
36
54
  end
37
55
 
38
- def self.reasembling_each_insert(line, table_name, columns)
39
- line = line.gsub(INSERT_REGEX, '').gsub(/\s*;\s*$/, '')
40
- output = context_aware_mysql_string_split(line).map do |sub_insert|
56
+ def reassembling_each_insert(line, table_name, columns)
57
+ output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
41
58
  result = yield(sub_insert)
42
59
  result = result.map do |i|
43
- if i.nil?
44
- "NULL"
45
- else
46
- "'" + i + "'"
47
- end
60
+ database_helper.make_valid_value_string(i)
48
61
  end
49
62
  result = result.join(",")
50
63
  "(" + result + ")"
51
64
  end.join(",")
52
- "INSERT INTO `#{table_name}` (`#{columns.join('`, `')}`) VALUES #{output};"
53
- end
54
-
55
- # Be aware, strings must be quoted in single quotes!
56
- def self.context_aware_mysql_string_split(string)
57
- in_sub_insert = false
58
- in_quoted_string = false
59
- escaped = false
60
- current_field = nil
61
- length = string.length
62
- index = 0
63
- fields = []
64
- output = []
65
- string.each_char do |i|
66
- if escaped
67
- escaped = false
68
- current_field ||= ""
69
- current_field << i
70
- else
71
- if i == "\\"
72
- escaped = true
73
- current_field ||= ""
74
- current_field << i
75
- elsif i == "(" && !in_quoted_string && !in_sub_insert
76
- in_sub_insert = true
77
- elsif i == ")" && !in_quoted_string && in_sub_insert
78
- fields << current_field unless current_field.nil?
79
- output << fields unless fields.length == 0
80
- in_sub_insert = false
81
- fields = []
82
- current_field = nil
83
- elsif i == "'" && !in_quoted_string
84
- fields << current_field unless current_field.nil?
85
- current_field = ''
86
- in_quoted_string = true
87
- elsif i == "'" && in_quoted_string
88
- fields << current_field unless current_field.nil?
89
- current_field = nil
90
- in_quoted_string = false
91
- elsif i == "," && !in_quoted_string && in_sub_insert
92
- fields << current_field unless current_field.nil?
93
- current_field = nil
94
- elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
95
- current_field = nil
96
- fields << current_field
97
- elsif (i == " " || i == "\t") && !in_quoted_string
98
- # Don't add whitespace not in a string
99
- elsif in_sub_insert
100
- current_field ||= ""
101
- current_field << i
102
- end
103
- end
104
- index += 1
105
- end
106
- fields << current_field unless current_field.nil?
107
- output << fields unless fields.length == 0
108
- output
65
+ database_helper.make_insert_statement(table_name, columns, output)
109
66
  end
110
67
 
111
68
  def self.row_as_hash(row, columns)
112
69
  columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
113
70
  end
114
71
 
72
+ def self.make_conditional_method(conditional_method, index, row)
73
+ if conditional_method.is_a?(Symbol)
74
+ if conditional_method == :blank
75
+ conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
76
+ elsif conditional_method == :nil
77
+ conditional_method = lambda { |row_hash| row[index].nil? }
78
+ end
79
+ end
80
+ conditional_method
81
+ end
82
+
115
83
  def self.apply_table_config(row, table_config, columns)
116
84
  return row unless table_config.is_a?(Hash)
117
85
  row_hash = row_as_hash(row, columns)
118
86
 
119
87
  table_config.each do |column, definition|
120
88
  index = columns.index(column)
89
+
90
+ definition = { :type => definition } if definition.is_a?(Symbol)
91
+
92
+ if definition.has_key?(:unless)
93
+ unless_check = make_conditional_method(definition[:unless], index, row)
121
94
 
122
- next if definition[:unless] && definition[:unless].call(row_hash)
123
- if definition[:if]
124
- next unless definition[:if].call(row_hash)
95
+ next if unless_check.call(row_hash)
96
+ end
97
+
98
+
99
+ if definition.has_key?(:if)
100
+ if_check = make_conditional_method(definition[:if], index, row)
101
+
102
+ next unless if_check.call(row_hash)
125
103
  end
126
104
 
127
105
  if definition[:skip_regexes]
@@ -130,20 +108,51 @@ class MyObfuscate
130
108
 
131
109
  row[index.to_i] = case definition[:type]
132
110
  when :email
133
- random_string(4..10, USERNAME_CHARS) + "@example.com"
111
+ clean_quotes(Faker::Internet.email)
134
112
  when :string
135
- random_string(definition[:length], definition[:chars] || SENSIBLE_CHARS)
113
+ random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
114
+ when :lorem
115
+ clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
116
+ when :name
117
+ clean_quotes(Faker::Name.name)
118
+ when :first_name
119
+ clean_quotes(Faker::Name.first_name)
120
+ when :last_name
121
+ clean_quotes(Faker::Name.last_name)
122
+ when :address
123
+ clean_quotes("#{Faker::Address.street_address}\\n#{Faker::Address.city}, #{Faker::Address.state_abbr} #{Faker::Address.zip_code}")
124
+ when :street_address
125
+ clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
126
+ when :city
127
+ clean_quotes(Faker::Address.city)
128
+ when :state
129
+ Faker::Address.state_abbr
130
+ when :zip_code
131
+ Faker::Address.zip_code
132
+ when :phone
133
+ Faker::PhoneNumber.phone_number
134
+ when :company
135
+ clean_bad_whitespace(clean_quotes(Faker::Company.name))
136
+ when :ipv4
137
+ Faker::Internet.ip_v4_address
138
+ when :ipv6
139
+ Faker::Internet.ip_v6_address
140
+ when :url
141
+ clean_bad_whitespace(Faker::Internet.url)
136
142
  when :integer
137
143
  random_integer(definition[:between] || (0..1000)).to_s
138
144
  when :fixed
139
145
  if definition[:one_of]
140
146
  definition[:one_of][(rand * definition[:one_of].length).to_i]
141
147
  else
142
- definition[:string]
148
+ definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
143
149
  end
144
150
  when :null
145
151
  nil
152
+ when :keep
153
+ row[index]
146
154
  else
155
+ $stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
147
156
  row[index]
148
157
  end
149
158
  end
@@ -162,26 +171,52 @@ class MyObfuscate
162
171
  out
163
172
  end
164
173
 
165
- def check_for_missing_columns(table_name, columns)
174
+ def check_for_defined_columns_not_in_table(table_name, columns)
166
175
  missing_columns = config[table_name].keys - columns
167
176
  unless missing_columns.length == 0
168
177
  error_message = missing_columns.map do |missing_column|
169
178
  "Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
170
179
  end.join("\n")
171
- raise RuntimeError.new(error_message)
180
+ raise RuntimeError.new(error_message)
172
181
  end
173
182
  end
174
183
 
175
- def obfuscate_bulk_insert_line (line, table_name, columns)
184
+ def check_for_table_columns_not_in_definition(table_name, columns)
185
+ missing_columns = columns - (config[table_name].keys + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
186
+ unless missing_columns.length == 0
187
+ error_message = missing_columns.map do |missing_column|
188
+ "Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
189
+ end.join("\n")
190
+ raise RuntimeError.new(error_message)
191
+ end
192
+ end
193
+
194
+ def obfuscate_bulk_insert_line(line, table_name, columns)
176
195
  table_config = config[table_name]
177
196
  if table_config == :truncate
178
197
  ""
198
+ elsif table_config == :keep
199
+ line
179
200
  else
180
- check_for_missing_columns(table_name, columns)
201
+ check_for_defined_columns_not_in_table(table_name, columns)
202
+ check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
181
203
  # Note: Remember to SQL escape strings in what you pass back.
182
- MyObfuscate.reasembling_each_insert(line, table_name, columns) do |row|
204
+ reassembling_each_insert(line, table_name, columns) do |row|
183
205
  MyObfuscate.apply_table_config(row, table_config, columns)
184
206
  end
185
207
  end
186
208
  end
209
+
210
+ private
211
+
212
+ def self.clean_quotes(value)
213
+ value.gsub(/['"]/, '')
214
+ end
215
+
216
+ def self.clean_bad_whitespace(value)
217
+ value.gsub(/[\n\t\r]/, '')
218
+ end
187
219
  end
220
+
221
+ require 'my_obfuscate/mysql'
222
+ require 'my_obfuscate/sql_server'