my_obfuscate 0.3.0 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -4,3 +4,7 @@ coverage
4
4
  rdoc
5
5
  pkg
6
6
  .idea
7
+ .rvmrc
8
+ Gemfile.lock
9
+ *.deb
10
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in my_obfuscate.gemspec
4
+ gemspec
data/README.rdoc CHANGED
@@ -1,10 +1,10 @@
1
1
  = my_obfuscate
2
2
 
3
- Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.
3
+ Standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. Supports MySQL and SQL Server.
4
4
 
5
5
  = Install
6
6
 
7
- sudo gem install my_obfuscate
7
+ (sudo) gem install my_obfuscate
8
8
 
9
9
  = Example Usage
10
10
 
@@ -17,34 +17,65 @@ Make an obfuscator.rb script:
17
17
  obfuscator = MyObfuscate.new({
18
18
  :people => {
19
19
  :email => { :type => :email, :skip_regexes => [/^[\w\.\_]+@my_company\.com$/i] },
20
- :ethnicity => { :type => :null },
20
+ :ethnicity => :keep,
21
21
  :crypted_password => { :type => :fixed, :string => "SOME_FIXED_PASSWORD_FOR_EASE_OF_DEBUGGING" },
22
22
  :salt => { :type => :fixed, :string => "SOME_THING" },
23
- :remember_token => { :type => :null },
24
- :remember_token_expires_at => { :type => :null },
25
- :photo_file_name => { :type => :null },
26
- :photo_content_type => { :type => :null },
27
- :photo_file_size => { :type => :null },
28
- :photo_updated_at => { :type => :null },
23
+ :remember_token => :null,
24
+ :remember_token_expires_at => :null,
25
+ :age => { :type => :null, :unless => lambda { |person| person[:email] == "hello@example.com" } },
26
+ :photo_file_name => :null,
27
+ :photo_content_type => :null,
28
+ :photo_file_size => :null,
29
+ :photo_updated_at => :null,
29
30
  :postal_code => { :type => :fixed, :string => "94109", :unless => lambda {|person| person[:postal_code] == "12345"} },
30
- :name => { :type => :fixed, :string => "Production User", :if => lambda {|person| person[:email] == "hello@example.com"} },
31
+ :name => :name,
32
+ :full_address => :address,
33
+ :bio => { :type => :lorem, :number => 4 },
31
34
  :relationship_status => { :type => :fixed, :one_of => ["Single", "Divorced", "Married", "Engaged", "In a Relationship"] },
32
35
  :has_children => { :type => :integer, :between => 0..1 },
33
36
  },
34
37
 
35
38
  :invites => :truncate,
36
39
  :invite_requests => :truncate,
40
+ :tags => :keep,
37
41
 
38
42
  :relationships => {
39
- :account_id => { :type => :string, :length => 8, :chars => MyObfuscate::NUMBER_CHARS },
43
+ :account_id => :keep,
40
44
  :code => { :type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS }
41
45
  }
42
46
  })
47
+ obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
48
+ obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
43
49
  obfuscator.obfuscate(STDIN, STDOUT)
44
50
 
45
51
  And to get an obfuscated dump:
46
- mysqldump -c --add-drop-table -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
47
- Note that the -c option on mysqldump is required to use my_obfuscator.
52
+
53
+ mysqldump -c --add-drop-table --hex-blob -u user -ppassword database | ruby obfuscator.rb > obfuscated_dump.sql
54
+
55
+ Note that the -c option on mysqldump is required to use my_obfuscator. Additionally, the default behavior of mysqldump
56
+ is to output special characters. This may cause trouble, so you can request hex-encoded blob content with --hex-blob.
57
+ If you get MySQL errors due to very long lines, try some combination of --max_allowed_packet=128M, --single-transaction, --skip-extended-insert, and --quick.
58
+
59
+ == Database Server
60
+
61
+ By default the database type is assumed to be MySQL, but you can use the
62
+ builtin SQL Server support by specifying:
63
+
64
+ obfuscator.database_type = :sql_server
65
+
66
+ == Types
67
+
68
+ Available types include: email, string, lorem, name, first_name, last_name, address, street_address, city, state,
69
+ zip_code, phone, company, ipv4, ipv6, url, integer, fixed, null, and keep.
70
+
71
+ == Changes
72
+
73
+ * Support for SQL Server
74
+ * :unless and :if now support :nil as a shorthand for a Proc that checks for nil
75
+ * :name, :lorem, and :address are all now supported types. You can pass :number to :lorem to specify how many sentences to generate. The default is one.
76
+ * <tt>{ :type => :whatever }</tt> is now optional when no additional options are needed. Just use <tt>:whatever</tt>.
77
+ * Warnings are thrown when an unknown column type or table is encountered. Use <tt>:keep</tt> in both cases.
78
+ * <tt>{ :type => :fixed, :string => Proc { |row| ... } }</tt> is now available.
48
79
 
49
80
  == Note on Patches/Pull Requests
50
81
 
@@ -54,6 +85,10 @@ Note that the -c option on mysqldump is required to use my_obfuscator.
54
85
  * Commit, do not mess with rakefile, version, or history. (If you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
55
86
  * Send me a pull request. Bonus points for topic branches.
56
87
 
88
+ == Thanks
89
+
90
+ Thanks to Mavenlink and Pivotal Labs for patches and updates!
91
+
57
92
  == Copyright
58
93
 
59
- Copyright (c) 2009 Honk. See LICENSE for details.
94
+ Copyright (c) 2009 Honk. Now maintained by Iteration Labs, LLC. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,60 +1,8 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
3
 
4
-
5
- # When updating:
6
- # rake version:bump:minor
7
- # rake gemspec
8
- # rake build
9
- # rake rubyforge:release
10
- # Then git checkin and commit
11
-
12
- begin
13
- require 'jeweler'
14
- Jeweler::Tasks.new do |gem|
15
- gem.name = "my_obfuscate"
16
- gem.summary = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
17
- gem.description = %Q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
18
- gem.email = "andrew@pivotallabs.com"
19
- gem.homepage = "http://github.com/honkster/myobfuscate"
20
- gem.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves"]
21
- gem.add_development_dependency "rspec"
22
- gem.rubyforge_project = 'my-obfuscate'
23
- end
24
-
25
- Jeweler::RubyforgeTasks.new do |rubyforge|
26
- rubyforge.doc_task = "rdoc"
27
- end
28
- rescue LoadError
29
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
4
+ RSpec::Core::RakeTask.new(:spec) do |spec|
5
+ spec.pattern = FileList['spec/**/*_spec.rb']
30
6
  end
31
7
 
32
- require 'spec/rake/spectask'
33
- Spec::Rake::SpecTask.new(:spec) do |spec|
34
- spec.libs << 'lib' << 'spec'
35
- spec.spec_files = FileList['spec/**/*_spec.rb']
36
- end
37
-
38
- Spec::Rake::SpecTask.new(:rcov) do |spec|
39
- spec.libs << 'lib' << 'spec'
40
- spec.pattern = 'spec/**/*_spec.rb'
41
- spec.rcov = true
42
- end
43
-
44
- task :spec => :check_dependencies
45
-
46
8
  task :default => :spec
47
-
48
- require 'rake/rdoctask'
49
- Rake::RDocTask.new do |rdoc|
50
- if File.exist?('VERSION')
51
- version = File.read('VERSION')
52
- else
53
- version = ""
54
- end
55
-
56
- rdoc.rdoc_dir = 'rdoc'
57
- rdoc.title = "my_obfuscate #{version}"
58
- rdoc.rdoc_files.include('README*')
59
- rdoc.rdoc_files.include('lib/**/*.rb')
60
- end
data/lib/my_obfuscate.rb CHANGED
@@ -1,11 +1,11 @@
1
- require 'jcode'
1
+ require 'jcode' if RUBY_VERSION < '1.9'
2
+ require 'faker'
2
3
 
3
4
  # Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
4
5
  # column names in the insert statements.
5
6
  class MyObfuscate
6
- attr_accessor :config
7
+ attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
7
8
 
8
- INSERT_REGEX = /^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
9
9
  NUMBER_CHARS = "1234567890"
10
10
  USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
11
11
  SENSIBLE_CHARS = USERNAME_CHARS + '+-=[{]}/?|!@#$%^&*()`~'
@@ -16,17 +16,35 @@ class MyObfuscate
16
16
  @config = configuration
17
17
  end
18
18
 
19
+ def fail_on_unspecified_columns?
20
+ @fail_on_unspecified_columns
21
+ end
22
+
23
+ def database_helper
24
+ if @database_helper.nil?
25
+ if @database_type == :sql_server
26
+ @database_helper = SqlServer.new
27
+ else
28
+ @database_helper = Mysql.new
29
+ end
30
+ end
31
+
32
+ @database_helper
33
+ end
34
+
19
35
  # Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
20
36
  # or STDIN and STDOUT.
21
37
  def obfuscate(input_io, output_io)
38
+
22
39
  # We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
23
40
  input_io.each do |line|
24
- if regex_result = INSERT_REGEX.match(line)
25
- table_name = regex_result[1].to_sym
26
- columns = regex_result[2].split(/`\s*,\s*`/).map { |col| col.gsub('`',"").to_sym }
41
+ if table_data = database_helper.parse_insert_statement(line)
42
+ table_name = table_data[:table_name]
43
+ columns = table_data[:column_names]
27
44
  if config[table_name]
28
45
  output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
29
46
  else
47
+ $stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
30
48
  output_io.write line
31
49
  end
32
50
  else
@@ -35,93 +53,53 @@ class MyObfuscate
35
53
  end
36
54
  end
37
55
 
38
- def self.reasembling_each_insert(line, table_name, columns)
39
- line = line.gsub(INSERT_REGEX, '').gsub(/\s*;\s*$/, '')
40
- output = context_aware_mysql_string_split(line).map do |sub_insert|
56
+ def reassembling_each_insert(line, table_name, columns)
57
+ output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
41
58
  result = yield(sub_insert)
42
59
  result = result.map do |i|
43
- if i.nil?
44
- "NULL"
45
- else
46
- "'" + i + "'"
47
- end
60
+ database_helper.make_valid_value_string(i)
48
61
  end
49
62
  result = result.join(",")
50
63
  "(" + result + ")"
51
64
  end.join(",")
52
- "INSERT INTO `#{table_name}` (`#{columns.join('`, `')}`) VALUES #{output};"
53
- end
54
-
55
- # Be aware, strings must be quoted in single quotes!
56
- def self.context_aware_mysql_string_split(string)
57
- in_sub_insert = false
58
- in_quoted_string = false
59
- escaped = false
60
- current_field = nil
61
- length = string.length
62
- index = 0
63
- fields = []
64
- output = []
65
- string.each_char do |i|
66
- if escaped
67
- escaped = false
68
- current_field ||= ""
69
- current_field << i
70
- else
71
- if i == "\\"
72
- escaped = true
73
- current_field ||= ""
74
- current_field << i
75
- elsif i == "(" && !in_quoted_string && !in_sub_insert
76
- in_sub_insert = true
77
- elsif i == ")" && !in_quoted_string && in_sub_insert
78
- fields << current_field unless current_field.nil?
79
- output << fields unless fields.length == 0
80
- in_sub_insert = false
81
- fields = []
82
- current_field = nil
83
- elsif i == "'" && !in_quoted_string
84
- fields << current_field unless current_field.nil?
85
- current_field = ''
86
- in_quoted_string = true
87
- elsif i == "'" && in_quoted_string
88
- fields << current_field unless current_field.nil?
89
- current_field = nil
90
- in_quoted_string = false
91
- elsif i == "," && !in_quoted_string && in_sub_insert
92
- fields << current_field unless current_field.nil?
93
- current_field = nil
94
- elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
95
- current_field = nil
96
- fields << current_field
97
- elsif (i == " " || i == "\t") && !in_quoted_string
98
- # Don't add whitespace not in a string
99
- elsif in_sub_insert
100
- current_field ||= ""
101
- current_field << i
102
- end
103
- end
104
- index += 1
105
- end
106
- fields << current_field unless current_field.nil?
107
- output << fields unless fields.length == 0
108
- output
65
+ database_helper.make_insert_statement(table_name, columns, output)
109
66
  end
110
67
 
111
68
  def self.row_as_hash(row, columns)
112
69
  columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
113
70
  end
114
71
 
72
+ def self.make_conditional_method(conditional_method, index, row)
73
+ if conditional_method.is_a?(Symbol)
74
+ if conditional_method == :blank
75
+ conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
76
+ elsif conditional_method == :nil
77
+ conditional_method = lambda { |row_hash| row[index].nil? }
78
+ end
79
+ end
80
+ conditional_method
81
+ end
82
+
115
83
  def self.apply_table_config(row, table_config, columns)
116
84
  return row unless table_config.is_a?(Hash)
117
85
  row_hash = row_as_hash(row, columns)
118
86
 
119
87
  table_config.each do |column, definition|
120
88
  index = columns.index(column)
89
+
90
+ definition = { :type => definition } if definition.is_a?(Symbol)
91
+
92
+ if definition.has_key?(:unless)
93
+ unless_check = make_conditional_method(definition[:unless], index, row)
121
94
 
122
- next if definition[:unless] && definition[:unless].call(row_hash)
123
- if definition[:if]
124
- next unless definition[:if].call(row_hash)
95
+ next if unless_check.call(row_hash)
96
+ end
97
+
98
+
99
+ if definition.has_key?(:if)
100
+ if_check = make_conditional_method(definition[:if], index, row)
101
+
102
+ next unless if_check.call(row_hash)
125
103
  end
126
104
 
127
105
  if definition[:skip_regexes]
@@ -130,20 +108,51 @@ class MyObfuscate
130
108
 
131
109
  row[index.to_i] = case definition[:type]
132
110
  when :email
133
- random_string(4..10, USERNAME_CHARS) + "@example.com"
111
+ clean_quotes(Faker::Internet.email)
134
112
  when :string
135
- random_string(definition[:length], definition[:chars] || SENSIBLE_CHARS)
113
+ random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
114
+ when :lorem
115
+ clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
116
+ when :name
117
+ clean_quotes(Faker::Name.name)
118
+ when :first_name
119
+ clean_quotes(Faker::Name.first_name)
120
+ when :last_name
121
+ clean_quotes(Faker::Name.last_name)
122
+ when :address
123
+ clean_quotes("#{Faker::Address.street_address}\\n#{Faker::Address.city}, #{Faker::Address.state_abbr} #{Faker::Address.zip_code}")
124
+ when :street_address
125
+ clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
126
+ when :city
127
+ clean_quotes(Faker::Address.city)
128
+ when :state
129
+ Faker::Address.state_abbr
130
+ when :zip_code
131
+ Faker::Address.zip_code
132
+ when :phone
133
+ Faker::PhoneNumber.phone_number
134
+ when :company
135
+ clean_bad_whitespace(clean_quotes(Faker::Company.name))
136
+ when :ipv4
137
+ Faker::Internet.ip_v4_address
138
+ when :ipv6
139
+ Faker::Internet.ip_v6_address
140
+ when :url
141
+ clean_bad_whitespace(Faker::Internet.url)
136
142
  when :integer
137
143
  random_integer(definition[:between] || (0..1000)).to_s
138
144
  when :fixed
139
145
  if definition[:one_of]
140
146
  definition[:one_of][(rand * definition[:one_of].length).to_i]
141
147
  else
142
- definition[:string]
148
+ definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
143
149
  end
144
150
  when :null
145
151
  nil
152
+ when :keep
153
+ row[index]
146
154
  else
155
+ $stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
147
156
  row[index]
148
157
  end
149
158
  end
@@ -162,26 +171,52 @@ class MyObfuscate
162
171
  out
163
172
  end
164
173
 
165
- def check_for_missing_columns(table_name, columns)
174
+ def check_for_defined_columns_not_in_table(table_name, columns)
166
175
  missing_columns = config[table_name].keys - columns
167
176
  unless missing_columns.length == 0
168
177
  error_message = missing_columns.map do |missing_column|
169
178
  "Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
170
179
  end.join("\n")
171
- raise RuntimeError.new(error_message)
180
+ raise RuntimeError.new(error_message)
172
181
  end
173
182
  end
174
183
 
175
- def obfuscate_bulk_insert_line (line, table_name, columns)
184
+ def check_for_table_columns_not_in_definition(table_name, columns)
185
+ missing_columns = columns - (config[table_name].keys + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
186
+ unless missing_columns.length == 0
187
+ error_message = missing_columns.map do |missing_column|
188
+ "Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
189
+ end.join("\n")
190
+ raise RuntimeError.new(error_message)
191
+ end
192
+ end
193
+
194
+ def obfuscate_bulk_insert_line(line, table_name, columns)
176
195
  table_config = config[table_name]
177
196
  if table_config == :truncate
178
197
  ""
198
+ elsif table_config == :keep
199
+ line
179
200
  else
180
- check_for_missing_columns(table_name, columns)
201
+ check_for_defined_columns_not_in_table(table_name, columns)
202
+ check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
181
203
  # Note: Remember to SQL escape strings in what you pass back.
182
- MyObfuscate.reasembling_each_insert(line, table_name, columns) do |row|
204
+ reassembling_each_insert(line, table_name, columns) do |row|
183
205
  MyObfuscate.apply_table_config(row, table_config, columns)
184
206
  end
185
207
  end
186
208
  end
209
+
210
+ private
211
+
212
+ def self.clean_quotes(value)
213
+ value.gsub(/['"]/, '')
214
+ end
215
+
216
+ def self.clean_bad_whitespace(value)
217
+ value.gsub(/[\n\t\r]/, '')
218
+ end
187
219
  end
220
+
221
+ require 'my_obfuscate/mysql'
222
+ require 'my_obfuscate/sql_server'