my_obfuscate 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +1 -0
- data/Gemfile +2 -0
- data/README.rdoc +6 -0
- data/lib/my_obfuscate/config_applicator.rb +146 -0
- data/lib/my_obfuscate/copy_statement_parser.rb +45 -0
- data/lib/my_obfuscate/insert_statement_parser.rb +22 -0
- data/lib/my_obfuscate/mysql.rb +76 -2
- data/lib/my_obfuscate/postgres.rb +37 -7
- data/lib/my_obfuscate/sql_server.rb +6 -1
- data/lib/my_obfuscate/version.rb +1 -1
- data/lib/my_obfuscate.rb +7 -163
- data/spec/my_obfuscate/config_applicator_spec.rb +276 -0
- data/spec/my_obfuscate/mysql_spec.rb +59 -2
- data/spec/my_obfuscate/postgres_spec.rb +30 -10
- data/spec/my_obfuscate_spec.rb +67 -253
- metadata +13 -5
- data/lib/my_obfuscate/database_helper_shared.rb +0 -76
- data/spec/my_obfuscate/database_helper_shared_examples.rb +0 -63
data/CHANGES
CHANGED
data/Gemfile
CHANGED
data/README.rdoc
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
= MyObfuscate
|
2
2
|
|
3
|
+
{<img src="https://travis-ci.org/mavenlink/my_obfuscate.png">}[https://travis-ci.org/mavenlink/my_obfuscate]
|
4
|
+
|
3
5
|
You want to develop against real production data, but you don't want to violate your users' privacy. Enter MyObfuscate: standalone Ruby code for the selective rewriting of SQL dumps in order to protect user privacy. It supports MySQL, Postgres, and SQL Server.
|
4
6
|
|
5
7
|
= Install
|
@@ -64,6 +66,10 @@ builtin SQL Server support by specifying:
|
|
64
66
|
obfuscator.database_type = :sql_server
|
65
67
|
obfuscator.database_type = :postgres
|
66
68
|
|
69
|
+
If using Postgres, use pg_dump to get a dump:
|
70
|
+
|
71
|
+
pg_dump database | ruby obfuscator.rb > obfuscated_dump.sql
|
72
|
+
|
67
73
|
== Types
|
68
74
|
|
69
75
|
Available types include: email, string, lorem, name, first_name, last_name, address, street_address, city, state,
|
@@ -0,0 +1,146 @@
|
|
1
|
+
class MyObfuscate
|
2
|
+
class ConfigApplicator
|
3
|
+
|
4
|
+
def self.apply_table_config(row, table_config, columns)
|
5
|
+
return row unless table_config.is_a?(Hash)
|
6
|
+
row_hash = row_as_hash(row, columns)
|
7
|
+
|
8
|
+
table_config.each do |column, definition|
|
9
|
+
index = columns.index(column)
|
10
|
+
|
11
|
+
definition = { :type => definition } if definition.is_a?(Symbol)
|
12
|
+
|
13
|
+
if definition.has_key?(:unless)
|
14
|
+
unless_check = make_conditional_method(definition[:unless], index, row)
|
15
|
+
|
16
|
+
next if unless_check.call(row_hash)
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
if definition.has_key?(:if)
|
21
|
+
if_check = make_conditional_method(definition[:if], index, row)
|
22
|
+
|
23
|
+
next unless if_check.call(row_hash)
|
24
|
+
end
|
25
|
+
|
26
|
+
if definition[:skip_regexes]
|
27
|
+
next if definition[:skip_regexes].any? {|regex| row[index] =~ regex}
|
28
|
+
end
|
29
|
+
|
30
|
+
row[index.to_i] = case definition[:type]
|
31
|
+
when :email
|
32
|
+
md5 = Digest::MD5.hexdigest(rand.to_s)[0...5]
|
33
|
+
clean_quotes("#{Faker::Internet.email}.#{md5}.example.com")
|
34
|
+
when :string
|
35
|
+
random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
|
36
|
+
when :lorem
|
37
|
+
clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
|
38
|
+
when :like_english
|
39
|
+
clean_quotes random_english_sentences(definition[:number] || 1)
|
40
|
+
when :name
|
41
|
+
clean_quotes(Faker::Name.name)
|
42
|
+
when :first_name
|
43
|
+
clean_quotes(Faker::Name.first_name)
|
44
|
+
when :last_name
|
45
|
+
clean_quotes(Faker::Name.last_name)
|
46
|
+
when :address
|
47
|
+
clean_quotes("#{Faker::AddressUS.street_address}\\n#{Faker::AddressUS.city}, #{Faker::AddressUS.state_abbr} #{Faker::AddressUS.zip_code}")
|
48
|
+
when :street_address
|
49
|
+
clean_bad_whitespace(clean_quotes(Faker::AddressUS.street_address))
|
50
|
+
when :city
|
51
|
+
clean_quotes(Faker::AddressUS.city)
|
52
|
+
when :state
|
53
|
+
clean_quotes Faker::AddressUS.state_abbr
|
54
|
+
when :zip_code
|
55
|
+
Faker::AddressUS.zip_code
|
56
|
+
when :phone
|
57
|
+
clean_quotes Faker::PhoneNumber.phone_number
|
58
|
+
when :company
|
59
|
+
clean_bad_whitespace(clean_quotes(Faker::Company.name))
|
60
|
+
when :ipv4
|
61
|
+
Faker::Internet.ip_v4_address
|
62
|
+
when :ipv6
|
63
|
+
# Inlined from Faker because ffaker doesn't have ipv6.
|
64
|
+
@@ip_v6_space ||= (0..65535).to_a
|
65
|
+
container = (1..8).map{ |_| @@ip_v6_space.sample }
|
66
|
+
container.map{ |n| n.to_s(16) }.join(':')
|
67
|
+
when :url
|
68
|
+
clean_bad_whitespace(Faker::Internet.http_url)
|
69
|
+
when :integer
|
70
|
+
random_integer(definition[:between] || (0..1000)).to_s
|
71
|
+
when :fixed
|
72
|
+
if definition[:one_of]
|
73
|
+
definition[:one_of][(rand * definition[:one_of].length).to_i]
|
74
|
+
else
|
75
|
+
definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
|
76
|
+
end
|
77
|
+
when :null
|
78
|
+
nil
|
79
|
+
when :keep
|
80
|
+
row[index]
|
81
|
+
else
|
82
|
+
$stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
|
83
|
+
row[index]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
row
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.row_as_hash(row, columns)
|
90
|
+
columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.make_conditional_method(conditional_method, index, row)
|
94
|
+
if conditional_method.is_a?(Symbol)
|
95
|
+
if conditional_method == :blank
|
96
|
+
conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
|
97
|
+
elsif conditional_method == :nil
|
98
|
+
conditional_method = lambda { |row_hash| row[index].nil? }
|
99
|
+
end
|
100
|
+
end
|
101
|
+
conditional_method
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.random_integer(between)
|
105
|
+
(between.min + (between.max - between.min) * rand).round
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.random_string(length_or_range, chars)
|
109
|
+
length_or_range = (length_or_range..length_or_range) if length_or_range.is_a?(Fixnum)
|
110
|
+
times = random_integer(length_or_range)
|
111
|
+
out = ""
|
112
|
+
times.times { out << chars[rand * chars.length] }
|
113
|
+
out
|
114
|
+
end
|
115
|
+
|
116
|
+
def self.random_english_sentences(num)
|
117
|
+
@@walker_method ||= begin
|
118
|
+
words, counts = [], []
|
119
|
+
File.read(File.expand_path(File.join(File.dirname(__FILE__), 'my_obfuscate', 'data', 'en_50K.txt'))).each_line do |line|
|
120
|
+
word, count = line.split(/\s+/)
|
121
|
+
words << word
|
122
|
+
counts << count.to_i
|
123
|
+
end
|
124
|
+
WalkerMethod.new(words, counts)
|
125
|
+
end
|
126
|
+
|
127
|
+
sentences = []
|
128
|
+
num.times do
|
129
|
+
words = []
|
130
|
+
(3 + rand * 5).to_i.times { words << @@walker_method.random }
|
131
|
+
sentences << words.join(" ") + "."
|
132
|
+
sentences.last[0] = sentences.last[0].upcase
|
133
|
+
end
|
134
|
+
sentences.join(" ")
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.clean_quotes(value)
|
138
|
+
value.gsub(/['"]/, '')
|
139
|
+
end
|
140
|
+
|
141
|
+
def self.clean_bad_whitespace(value)
|
142
|
+
value.gsub(/[\n\t\r]/, '')
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class MyObfuscate
|
2
|
+
module CopyStatementParser
|
3
|
+
|
4
|
+
# Postgres uses COPY statements instead of INSERT and look like:
|
5
|
+
#
|
6
|
+
# COPY some_table (a, b, c, d) FROM stdin;
|
7
|
+
# 1 2 3 4
|
8
|
+
# 5 6 7 8
|
9
|
+
# \.
|
10
|
+
#
|
11
|
+
# This requires the parse methods to persist data (table name and
|
12
|
+
# column names) across multiple lines.
|
13
|
+
#
|
14
|
+
def parse(obfuscator, config, input_io, output_io)
|
15
|
+
current_table_name, current_columns = ""
|
16
|
+
inside_copy_statement = false
|
17
|
+
|
18
|
+
input_io.each do |line|
|
19
|
+
if parse_insert_statement(line)
|
20
|
+
raise RuntimeError.new("Cannot obfuscate Postgres dumps containing INSERT statements. Please use COPY statments.")
|
21
|
+
elsif table_data = parse_copy_statement(line)
|
22
|
+
inside_copy_statement = true
|
23
|
+
|
24
|
+
current_table_name = table_data[:table_name]
|
25
|
+
current_columns = table_data[:column_names]
|
26
|
+
|
27
|
+
if !config[current_table_name]
|
28
|
+
$stderr.puts "Deprecated: #{current_table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
|
29
|
+
end
|
30
|
+
|
31
|
+
output_io.write line
|
32
|
+
elsif line.match /\S*\.\n/
|
33
|
+
inside_copy_statement = false
|
34
|
+
|
35
|
+
output_io.write line
|
36
|
+
elsif inside_copy_statement
|
37
|
+
output_io.puts obfuscator.obfuscate_bulk_insert_line(line, current_table_name, current_columns)
|
38
|
+
else
|
39
|
+
output_io.write line
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class MyObfuscate
|
2
|
+
module InsertStatementParser
|
3
|
+
|
4
|
+
def parse(obfuscator, config, input_io, output_io)
|
5
|
+
input_io.each do |line|
|
6
|
+
if table_data = parse_insert_statement(line)
|
7
|
+
table_name = table_data[:table_name]
|
8
|
+
columns = table_data[:column_names]
|
9
|
+
if config[table_name]
|
10
|
+
output_io.puts obfuscator.obfuscate_bulk_insert_line(line, table_name, columns)
|
11
|
+
else
|
12
|
+
$stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
|
13
|
+
output_io.write line
|
14
|
+
end
|
15
|
+
else
|
16
|
+
output_io.write line
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/my_obfuscate/mysql.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
class MyObfuscate
|
2
2
|
class Mysql
|
3
|
-
include MyObfuscate::
|
3
|
+
include MyObfuscate::InsertStatementParser
|
4
4
|
|
5
5
|
def parse_insert_statement(line)
|
6
6
|
if regex_match = insert_regex.match(line)
|
@@ -11,7 +11,11 @@ class MyObfuscate
|
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
14
|
-
def make_insert_statement(table_name, column_names,
|
14
|
+
def make_insert_statement(table_name, column_names, values)
|
15
|
+
values_strings = values.collect do |values|
|
16
|
+
"(" + values.join(",") + ")"
|
17
|
+
end.join(",")
|
18
|
+
|
15
19
|
"INSERT INTO `#{table_name}` (`#{column_names.join('`, `')}`) VALUES #{values_strings};"
|
16
20
|
end
|
17
21
|
|
@@ -19,5 +23,75 @@ class MyObfuscate
|
|
19
23
|
/^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
|
20
24
|
end
|
21
25
|
|
26
|
+
def rows_to_be_inserted(line)
|
27
|
+
line = line.gsub(insert_regex, '').gsub(/\s*;\s*$/, '')
|
28
|
+
context_aware_mysql_string_split(line)
|
29
|
+
end
|
30
|
+
|
31
|
+
def make_valid_value_string(value)
|
32
|
+
if value.nil?
|
33
|
+
"NULL"
|
34
|
+
elsif value =~ /^0x[0-9a-fA-F]+$/
|
35
|
+
value
|
36
|
+
else
|
37
|
+
"'" + value + "'"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Be aware, strings must be quoted in single quotes!
|
42
|
+
def context_aware_mysql_string_split(string)
|
43
|
+
in_sub_insert = false
|
44
|
+
in_quoted_string = false
|
45
|
+
escaped = false
|
46
|
+
current_field = nil
|
47
|
+
length = string.length
|
48
|
+
fields = []
|
49
|
+
output = []
|
50
|
+
|
51
|
+
string.each_char do |i|
|
52
|
+
if escaped
|
53
|
+
escaped = false
|
54
|
+
current_field ||= ""
|
55
|
+
current_field << i
|
56
|
+
else
|
57
|
+
if i == "\\"
|
58
|
+
escaped = true
|
59
|
+
current_field ||= ""
|
60
|
+
current_field << i
|
61
|
+
elsif i == "(" && !in_quoted_string && !in_sub_insert
|
62
|
+
in_sub_insert = true
|
63
|
+
elsif i == ")" && !in_quoted_string && in_sub_insert
|
64
|
+
fields << current_field unless current_field.nil?
|
65
|
+
output << fields unless fields.length == 0
|
66
|
+
in_sub_insert = false
|
67
|
+
fields = []
|
68
|
+
current_field = nil
|
69
|
+
elsif i == "'" && !in_quoted_string
|
70
|
+
fields << current_field unless current_field.nil?
|
71
|
+
current_field = ''
|
72
|
+
in_quoted_string = true
|
73
|
+
elsif i == "'" && in_quoted_string
|
74
|
+
fields << current_field unless current_field.nil?
|
75
|
+
current_field = nil
|
76
|
+
in_quoted_string = false
|
77
|
+
elsif i == "," && !in_quoted_string && in_sub_insert
|
78
|
+
fields << current_field unless current_field.nil?
|
79
|
+
current_field = nil
|
80
|
+
elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
|
81
|
+
current_field = nil
|
82
|
+
fields << current_field
|
83
|
+
elsif (i == " " || i == "\t") && !in_quoted_string
|
84
|
+
# Don't add whitespace not in a string
|
85
|
+
elsif in_sub_insert
|
86
|
+
current_field ||= ""
|
87
|
+
current_field << i
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
fields << current_field unless current_field.nil?
|
93
|
+
output << fields unless fields.length == 0
|
94
|
+
output
|
95
|
+
end
|
22
96
|
end
|
23
97
|
end
|
@@ -1,9 +1,31 @@
|
|
1
1
|
class MyObfuscate
|
2
2
|
class Postgres
|
3
|
-
include MyObfuscate::
|
3
|
+
include MyObfuscate::CopyStatementParser
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
# Copy statements contain the column values tab seperated like so:
|
6
|
+
# blah blah blah blah
|
7
|
+
# which we want to turn into:
|
8
|
+
# [['blah','blah','blah','blah']]
|
9
|
+
#
|
10
|
+
# We wrap it in an array to keep it consistent with MySql bulk
|
11
|
+
# obfuscation (multiple rows per insert statement)
|
12
|
+
def rows_to_be_inserted(line)
|
13
|
+
line.gsub!(/\n$/,"")
|
14
|
+
row = line.split(/\t/)
|
15
|
+
|
16
|
+
row.collect! do |value|
|
17
|
+
if value == "\\N"
|
18
|
+
nil
|
19
|
+
else
|
20
|
+
value
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
[row]
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_copy_statement(line)
|
28
|
+
if regex_match = /^\s*COPY (.*?) \((.*?)\) FROM\s*/i.match(line)
|
7
29
|
{
|
8
30
|
:table_name => regex_match[1].to_sym,
|
9
31
|
:column_names => regex_match[2].split(/\s*,\s*/).map(&:to_sym)
|
@@ -11,12 +33,20 @@ class MyObfuscate
|
|
11
33
|
end
|
12
34
|
end
|
13
35
|
|
14
|
-
def make_insert_statement(table_name, column_names,
|
15
|
-
|
36
|
+
def make_insert_statement(table_name, column_names, values)
|
37
|
+
values.join("\t")
|
16
38
|
end
|
17
39
|
|
18
|
-
def
|
19
|
-
|
40
|
+
def make_valid_value_string(value)
|
41
|
+
if value.nil?
|
42
|
+
"\\N"
|
43
|
+
else
|
44
|
+
value
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_insert_statement(line)
|
49
|
+
/^\s*INSERT INTO/i.match(line)
|
20
50
|
end
|
21
51
|
|
22
52
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
class MyObfuscate
|
2
2
|
class SqlServer
|
3
|
+
include MyObfuscate::InsertStatementParser
|
3
4
|
|
4
5
|
def parse_insert_statement(line)
|
5
6
|
if regex_match = insert_regex.match(line)
|
@@ -25,7 +26,11 @@ class MyObfuscate
|
|
25
26
|
end
|
26
27
|
end
|
27
28
|
|
28
|
-
def make_insert_statement(table_name, column_names,
|
29
|
+
def make_insert_statement(table_name, column_names, values)
|
30
|
+
values_strings = values.collect do |values|
|
31
|
+
"(" + values.join(",") + ")"
|
32
|
+
end.join(",")
|
33
|
+
|
29
34
|
"INSERT [dbo].[#{table_name}] ([#{column_names.join("], [")}]) VALUES #{values_strings};"
|
30
35
|
end
|
31
36
|
|
data/lib/my_obfuscate/version.rb
CHANGED
data/lib/my_obfuscate.rb
CHANGED
@@ -39,22 +39,7 @@ class MyObfuscate
|
|
39
39
|
# Read an input stream and dump out an obfuscated output stream. These streams could be StringIO objects, Files,
|
40
40
|
# or STDIN and STDOUT.
|
41
41
|
def obfuscate(input_io, output_io)
|
42
|
-
|
43
|
-
# We assume that every INSERT INTO line occupies one line in the file, with no internal linebreaks.
|
44
|
-
input_io.each do |line|
|
45
|
-
if table_data = database_helper.parse_insert_statement(line)
|
46
|
-
table_name = table_data[:table_name]
|
47
|
-
columns = table_data[:column_names]
|
48
|
-
if config[table_name]
|
49
|
-
output_io.puts obfuscate_bulk_insert_line(line, table_name, columns)
|
50
|
-
else
|
51
|
-
$stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
|
52
|
-
output_io.write line
|
53
|
-
end
|
54
|
-
else
|
55
|
-
output_io.write line
|
56
|
-
end
|
57
|
-
end
|
42
|
+
database_helper.parse(self, config, input_io, output_io)
|
58
43
|
end
|
59
44
|
|
60
45
|
def reassembling_each_insert(line, table_name, columns)
|
@@ -63,146 +48,12 @@ class MyObfuscate
|
|
63
48
|
result = result.map do |i|
|
64
49
|
database_helper.make_valid_value_string(i)
|
65
50
|
end
|
66
|
-
result = result.join(",")
|
67
|
-
"(" + result + ")"
|
68
|
-
end.join(",")
|
69
|
-
database_helper.make_insert_statement(table_name, columns, output)
|
70
|
-
end
|
71
|
-
|
72
|
-
def self.row_as_hash(row, columns)
|
73
|
-
columns.zip(row).inject({}) {|m, (name, value)| m[name] = value; m}
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.make_conditional_method(conditional_method, index, row)
|
77
|
-
if conditional_method.is_a?(Symbol)
|
78
|
-
if conditional_method == :blank
|
79
|
-
conditional_method = lambda { |row_hash| row[index].nil? || row[index] == '' }
|
80
|
-
elsif conditional_method == :nil
|
81
|
-
conditional_method = lambda { |row_hash| row[index].nil? }
|
82
|
-
end
|
83
51
|
end
|
84
|
-
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.apply_table_config(row, table_config, columns)
|
88
|
-
return row unless table_config.is_a?(Hash)
|
89
|
-
row_hash = row_as_hash(row, columns)
|
90
|
-
|
91
|
-
table_config.each do |column, definition|
|
92
|
-
index = columns.index(column)
|
93
|
-
|
94
|
-
definition = { :type => definition } if definition.is_a?(Symbol)
|
95
|
-
|
96
|
-
if definition.has_key?(:unless)
|
97
|
-
unless_check = make_conditional_method(definition[:unless], index, row)
|
98
|
-
|
99
|
-
next if unless_check.call(row_hash)
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
if definition.has_key?(:if)
|
104
|
-
if_check = make_conditional_method(definition[:if], index, row)
|
105
|
-
|
106
|
-
next unless if_check.call(row_hash)
|
107
|
-
end
|
108
|
-
|
109
|
-
if definition[:skip_regexes]
|
110
|
-
next if definition[:skip_regexes].any? {|regex| row[index] =~ regex}
|
111
|
-
end
|
112
|
-
|
113
|
-
row[index.to_i] = case definition[:type]
|
114
|
-
when :email
|
115
|
-
md5 = Digest::MD5.hexdigest(rand.to_s)[0...5]
|
116
|
-
clean_quotes("#{Faker::Internet.email}.#{md5}.example.com")
|
117
|
-
when :string
|
118
|
-
random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
|
119
|
-
when :lorem
|
120
|
-
clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join(". ")))
|
121
|
-
when :like_english
|
122
|
-
clean_quotes random_english_sentences(definition[:number] || 1)
|
123
|
-
when :name
|
124
|
-
clean_quotes(Faker::Name.name)
|
125
|
-
when :first_name
|
126
|
-
clean_quotes(Faker::Name.first_name)
|
127
|
-
when :last_name
|
128
|
-
clean_quotes(Faker::Name.last_name)
|
129
|
-
when :address
|
130
|
-
clean_quotes("#{Faker::AddressUS.street_address}\\n#{Faker::AddressUS.city}, #{Faker::AddressUS.state_abbr} #{Faker::AddressUS.zip_code}")
|
131
|
-
when :street_address
|
132
|
-
clean_bad_whitespace(clean_quotes(Faker::AddressUS.street_address))
|
133
|
-
when :city
|
134
|
-
clean_quotes(Faker::AddressUS.city)
|
135
|
-
when :state
|
136
|
-
clean_quotes Faker::AddressUS.state_abbr
|
137
|
-
when :zip_code
|
138
|
-
Faker::AddressUS.zip_code
|
139
|
-
when :phone
|
140
|
-
clean_quotes Faker::PhoneNumber.phone_number
|
141
|
-
when :company
|
142
|
-
clean_bad_whitespace(clean_quotes(Faker::Company.name))
|
143
|
-
when :ipv4
|
144
|
-
Faker::Internet.ip_v4_address
|
145
|
-
when :ipv6
|
146
|
-
# Inlined from Faker because ffaker doesn't have ipv6.
|
147
|
-
@@ip_v6_space ||= (0..65535).to_a
|
148
|
-
container = (1..8).map{ |_| @@ip_v6_space.sample }
|
149
|
-
container.map{ |n| n.to_s(16) }.join(':')
|
150
|
-
when :url
|
151
|
-
clean_bad_whitespace(Faker::Internet.http_url)
|
152
|
-
when :integer
|
153
|
-
random_integer(definition[:between] || (0..1000)).to_s
|
154
|
-
when :fixed
|
155
|
-
if definition[:one_of]
|
156
|
-
definition[:one_of][(rand * definition[:one_of].length).to_i]
|
157
|
-
else
|
158
|
-
definition[:string].is_a?(Proc) ? definition[:string].call(row_hash) : definition[:string]
|
159
|
-
end
|
160
|
-
when :null
|
161
|
-
nil
|
162
|
-
when :keep
|
163
|
-
row[index]
|
164
|
-
else
|
165
|
-
$stderr.puts "Keeping a column value by providing an unknown type (#{definition[:type]}) is deprecated. Use :keep instead."
|
166
|
-
row[index]
|
167
|
-
end
|
168
|
-
end
|
169
|
-
row
|
170
|
-
end
|
171
|
-
|
172
|
-
def self.random_integer(between)
|
173
|
-
(between.min + (between.max - between.min) * rand).round
|
174
|
-
end
|
175
|
-
|
176
|
-
def self.random_string(length_or_range, chars)
|
177
|
-
length_or_range = (length_or_range..length_or_range) if length_or_range.is_a?(Fixnum)
|
178
|
-
times = random_integer(length_or_range)
|
179
|
-
out = ""
|
180
|
-
times.times { out << chars[rand * chars.length] }
|
181
|
-
out
|
182
|
-
end
|
183
|
-
|
184
|
-
def self.random_english_sentences(num)
|
185
|
-
@@walker_method ||= begin
|
186
|
-
words, counts = [], []
|
187
|
-
File.read(File.expand_path(File.join(File.dirname(__FILE__), 'my_obfuscate', 'data', 'en_50K.txt'))).each_line do |line|
|
188
|
-
word, count = line.split(/\s+/)
|
189
|
-
words << word
|
190
|
-
counts << count.to_i
|
191
|
-
end
|
192
|
-
WalkerMethod.new(words, counts)
|
193
|
-
end
|
194
|
-
|
195
|
-
sentences = []
|
196
|
-
num.times do
|
197
|
-
words = []
|
198
|
-
(5 + rand * 6).to_i.times { words << @@walker_method.random }
|
199
|
-
sentences << words.join(" ") + "."
|
200
|
-
sentences.last[0] = sentences.last[0].upcase
|
201
|
-
end
|
202
|
-
sentences.join(" ")
|
52
|
+
database_helper.make_insert_statement(table_name, columns, output)
|
203
53
|
end
|
204
54
|
|
205
55
|
def check_for_defined_columns_not_in_table(table_name, columns)
|
56
|
+
return unless config[table_name]
|
206
57
|
missing_columns = config[table_name].keys - columns
|
207
58
|
unless missing_columns.length == 0
|
208
59
|
error_message = missing_columns.map do |missing_column|
|
@@ -233,23 +84,16 @@ class MyObfuscate
|
|
233
84
|
check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
|
234
85
|
# Note: Remember to SQL escape strings in what you pass back.
|
235
86
|
reassembling_each_insert(line, table_name, columns) do |row|
|
236
|
-
|
87
|
+
ConfigApplicator.apply_table_config(row, table_config, columns)
|
237
88
|
end
|
238
89
|
end
|
239
90
|
end
|
240
91
|
|
241
|
-
private
|
242
|
-
|
243
|
-
def self.clean_quotes(value)
|
244
|
-
value.gsub(/['"]/, '')
|
245
|
-
end
|
246
|
-
|
247
|
-
def self.clean_bad_whitespace(value)
|
248
|
-
value.gsub(/[\n\t\r]/, '')
|
249
|
-
end
|
250
92
|
end
|
251
93
|
|
252
|
-
require 'my_obfuscate/
|
94
|
+
require 'my_obfuscate/copy_statement_parser'
|
95
|
+
require 'my_obfuscate/insert_statement_parser'
|
253
96
|
require 'my_obfuscate/mysql'
|
254
97
|
require 'my_obfuscate/sql_server'
|
255
98
|
require 'my_obfuscate/postgres'
|
99
|
+
require 'my_obfuscate/config_applicator'
|