my_obfuscate 0.5.3 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.ruby-version +1 -1
- data/README.rdoc +27 -1
- data/Rakefile +1 -0
- data/lib/my_obfuscate/config_applicator.rb +11 -12
- data/lib/my_obfuscate/config_scaffold_generator.rb +85 -0
- data/lib/my_obfuscate/copy_statement_parser.rb +3 -2
- data/lib/my_obfuscate/insert_statement_parser.rb +4 -1
- data/lib/my_obfuscate/mysql.rb +67 -66
- data/lib/my_obfuscate/postgres.rb +3 -2
- data/lib/my_obfuscate/sql_server.rb +2 -1
- data/lib/my_obfuscate/version.rb +1 -1
- data/lib/my_obfuscate.rb +26 -9
- data/mise.toml +2 -0
- data/my_obfuscate.gemspec +6 -6
- data/spec/my_obfuscate/config_applicator_spec.rb +2 -1
- data/spec/my_obfuscate/mysql_spec.rb +8 -1
- data/spec/my_obfuscate_spec.rb +375 -1
- data/spec/spec_helper.rb +1 -1
- metadata +11 -19
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: bf957dc89a1e6833eb09616f54f2da012d24e9bec472a72cd861e646a4e85abf
|
|
4
|
+
data.tar.gz: cc7ce4fe94578d843bf383dbe685aefd5ea3fcba3d026631bf1ab5be231fbfad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8a3040cda7709223f02815f67c431664d4ae2564c33f3f8c6d00124fe22af3b6cb89edac68fdf1100b930d093d4fe60d5471dd8bb753448467063913d4dd8190
|
|
7
|
+
data.tar.gz: 10440115a506cf1265db0cbc76fb19b11292578ac58f2c55e976ef895a7d4be24a44946e870f388e93d889a9f7023c73e9e861a894b595a9ba48ae21b718e6dc
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
ruby-2.
|
|
1
|
+
ruby-2.4.0
|
data/README.rdoc
CHANGED
|
@@ -48,6 +48,7 @@ Make an obfuscator.rb script:
|
|
|
48
48
|
})
|
|
49
49
|
obfuscator.fail_on_unspecified_columns = true # if you want it to require every column in the table to be in the above definition
|
|
50
50
|
obfuscator.globally_kept_columns = %w[id created_at updated_at] # if you set fail_on_unspecified_columns, you may want this as well
|
|
51
|
+
# If you'd like to also validate against your schema.rb file to make sure all fields and tables are present, see https://gist.github.com/cantino/5376e73b0ad806dc4da4
|
|
51
52
|
obfuscator.obfuscate(STDIN, STDOUT)
|
|
52
53
|
|
|
53
54
|
And to get an obfuscated dump:
|
|
@@ -72,9 +73,34 @@ If using Postgres, use pg_dump to get a dump:
|
|
|
72
73
|
|
|
73
74
|
== Types
|
|
74
75
|
|
|
75
|
-
Available types include: email, string, lorem, name, first_name, last_name, address, street_address, city, state,
|
|
76
|
+
Available types include: email, string, lorem, name, first_name, last_name, address, street_address, secondary_address, city, state,
|
|
76
77
|
zip_code, phone, company, ipv4, ipv6, url, integer, fixed, null, and keep.
|
|
77
78
|
|
|
79
|
+
== Helping with creation of the "obfuscator.rb" script
|
|
80
|
+
|
|
81
|
+
If you don't want to type all those table names and column names into your obfuscator.rb script,
|
|
82
|
+
you can use my_obfuscate to do some of that work for you. It can consume your database dump file and create a "scaffold" for the script.
|
|
83
|
+
To run my_obfuscate in this mode, start with an "empty" scaffolder.rb script as follows:
|
|
84
|
+
|
|
85
|
+
#!/usr/bin/env ruby
|
|
86
|
+
require "rubygems"
|
|
87
|
+
require "my_obfuscate"
|
|
88
|
+
|
|
89
|
+
obfuscator = MyObfuscate.new({})
|
|
90
|
+
obfuscator.scaffold(STDIN, STDOUT)
|
|
91
|
+
|
|
92
|
+
Then feed in your database dump:
|
|
93
|
+
mysqldump -c --hex-blob -u user -ppassword database | ruby scaffolder.rb > obfuscator_scaffold.rb_snippet
|
|
94
|
+
pg_dump database | ruby scaffolder.rb > obfuscator_scaffold.rb_snippet
|
|
95
|
+
|
|
96
|
+
The output will be a series of configuration statements of the form:
|
|
97
|
+
:table_name => {
|
|
98
|
+
:column1_name => :keep # scaffold
|
|
99
|
+
:column2_name => :keep # scaffold
|
|
100
|
+
... etc.
|
|
101
|
+
|
|
102
|
+
Scaffolding also works if you have a partial configuration. If your configuration is missing some tables or some columns, a call to 'scaffold' will pass through the configuration that exists and augment it with scaffolding for the missing tables or columns.
|
|
103
|
+
|
|
78
104
|
== Changes
|
|
79
105
|
|
|
80
106
|
* Support for Postgres. Thanks @samuelreh!
|
data/Rakefile
CHANGED
|
@@ -34,7 +34,7 @@ class MyObfuscate
|
|
|
34
34
|
when :string
|
|
35
35
|
random_string(definition[:length] || 30, definition[:chars] || SENSIBLE_CHARS)
|
|
36
36
|
when :lorem
|
|
37
|
-
clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(definition[:number] || 1).join("
|
|
37
|
+
clean_bad_whitespace(clean_quotes(Faker::Lorem.sentences(number: definition[:number] || 1).join(" ")))
|
|
38
38
|
when :like_english
|
|
39
39
|
clean_quotes random_english_sentences(definition[:number] || 1)
|
|
40
40
|
when :name
|
|
@@ -44,15 +44,17 @@ class MyObfuscate
|
|
|
44
44
|
when :last_name
|
|
45
45
|
clean_quotes(Faker::Name.last_name)
|
|
46
46
|
when :address
|
|
47
|
-
clean_quotes(
|
|
47
|
+
clean_quotes(Faker::Address.full_address)
|
|
48
48
|
when :street_address
|
|
49
|
-
clean_bad_whitespace(clean_quotes(Faker::
|
|
49
|
+
clean_bad_whitespace(clean_quotes(Faker::Address.street_address))
|
|
50
|
+
when :secondary_address
|
|
51
|
+
clean_bad_whitespace(clean_quotes(Faker::Address.secondary_address))
|
|
50
52
|
when :city
|
|
51
|
-
clean_quotes(Faker::
|
|
53
|
+
clean_quotes(Faker::Address.city)
|
|
52
54
|
when :state
|
|
53
|
-
clean_quotes Faker::
|
|
55
|
+
clean_quotes Faker::Address.state_abbr
|
|
54
56
|
when :zip_code
|
|
55
|
-
Faker::
|
|
57
|
+
Faker::Address.zip_code
|
|
56
58
|
when :phone
|
|
57
59
|
clean_quotes Faker::PhoneNumber.phone_number
|
|
58
60
|
when :company
|
|
@@ -60,12 +62,9 @@ class MyObfuscate
|
|
|
60
62
|
when :ipv4
|
|
61
63
|
Faker::Internet.ip_v4_address
|
|
62
64
|
when :ipv6
|
|
63
|
-
|
|
64
|
-
@@ip_v6_space ||= (0..65535).to_a
|
|
65
|
-
container = (1..8).map{ |_| @@ip_v6_space.sample }
|
|
66
|
-
container.map{ |n| n.to_s(16) }.join(':')
|
|
65
|
+
Faker::Internet.ip_v6_address
|
|
67
66
|
when :url
|
|
68
|
-
clean_bad_whitespace(Faker::Internet.
|
|
67
|
+
clean_bad_whitespace(Faker::Internet.url)
|
|
69
68
|
when :integer
|
|
70
69
|
random_integer(definition[:between] || (0..1000)).to_s
|
|
71
70
|
when :fixed
|
|
@@ -106,7 +105,7 @@ class MyObfuscate
|
|
|
106
105
|
end
|
|
107
106
|
|
|
108
107
|
def self.random_string(length_or_range, chars)
|
|
109
|
-
length_or_range = (length_or_range..length_or_range) if length_or_range.is_a?(
|
|
108
|
+
length_or_range = (length_or_range..length_or_range) if length_or_range.is_a?(Integer)
|
|
110
109
|
times = random_integer(length_or_range)
|
|
111
110
|
out = ""
|
|
112
111
|
times.times { out << chars[rand * chars.length] }
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
class MyObfuscate
|
|
2
|
+
module ConfigScaffoldGenerator
|
|
3
|
+
|
|
4
|
+
def generate_config(obfuscator, config, input_io, output_io)
|
|
5
|
+
input_io.each do |line|
|
|
6
|
+
if obfuscator.database_type == :postgres
|
|
7
|
+
table_data = parse_copy_statement(line)
|
|
8
|
+
else
|
|
9
|
+
table_data = parse_insert_statement(line)
|
|
10
|
+
end
|
|
11
|
+
next unless table_data
|
|
12
|
+
|
|
13
|
+
table_name = table_data[:table_name]
|
|
14
|
+
next if obfuscator.scaffolded_tables[table_name] # only process each table_name once
|
|
15
|
+
|
|
16
|
+
columns = table_data[:column_names]
|
|
17
|
+
table_config = config[table_name]
|
|
18
|
+
next if table_config == :truncate || table_config == :keep
|
|
19
|
+
|
|
20
|
+
missing_columns = obfuscator.missing_column_list(table_name, columns)
|
|
21
|
+
extra_columns = obfuscator.extra_column_list(table_name, columns)
|
|
22
|
+
|
|
23
|
+
if missing_columns.count == 0 && extra_columns.count == 0
|
|
24
|
+
# all columns are accounted for
|
|
25
|
+
output_io.puts "\n# All columns in the config for #{table_name.upcase} are present and accounted for."
|
|
26
|
+
else
|
|
27
|
+
# there are columns missing (or perhaps the whole table is missing); show a scaffold
|
|
28
|
+
emit_scaffold(table_name, table_config, extra_columns, missing_columns, output_io)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Now that this table_name has been processed, remember it so we don't scaffold it again
|
|
32
|
+
obfuscator.scaffolded_tables[table_name] = 1
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def config_table_open(table_name)
|
|
37
|
+
"\n :#{table_name} => {"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def config_table_close(table_name)
|
|
41
|
+
" },"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def emit_scaffold(table_name, existing_config, extra_columns, columns_to_scaffold, output_io)
|
|
45
|
+
|
|
46
|
+
# header block: contains table name and any existing config
|
|
47
|
+
if existing_config
|
|
48
|
+
output_io.puts config_table_open(table_name)
|
|
49
|
+
existing_config.each do |column, definition|
|
|
50
|
+
unless extra_columns.include?(column)
|
|
51
|
+
output_io.puts formatted_line(column, definition)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
extra_columns.each do |column|
|
|
57
|
+
output_string = formatted_line(column, existing_config[column], "# unreferenced config")
|
|
58
|
+
output_io.puts "# #{output_string}"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# scaffold block: contains any config that's not already present
|
|
62
|
+
output_io.puts config_table_open(table_name) unless existing_config
|
|
63
|
+
|
|
64
|
+
scaffold = columns_to_scaffold.map do |column|
|
|
65
|
+
formatted_line(column, "keep", "# scaffold")
|
|
66
|
+
end.join("\n").chomp(',')
|
|
67
|
+
output_io.puts scaffold
|
|
68
|
+
output_io.puts config_table_close(table_name)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def formatted_line(column, definition, comment = nil)
|
|
72
|
+
colon_string = if (definition.to_s[0]=="{" || definition.to_s[0]==":") then definition.to_s else ":#{definition}" end
|
|
73
|
+
|
|
74
|
+
if column.length < 40
|
|
75
|
+
" :#{'%-40.40s' % column} => #{colon_string}, #{comment}"
|
|
76
|
+
else
|
|
77
|
+
" :#{column} => #{definition}, #{comment}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
|
|
@@ -29,12 +29,13 @@ class MyObfuscate
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
output_io.write line
|
|
32
|
-
elsif line.match
|
|
32
|
+
elsif line.match /^\\\.$/
|
|
33
33
|
inside_copy_statement = false
|
|
34
34
|
|
|
35
35
|
output_io.write line
|
|
36
36
|
elsif inside_copy_statement
|
|
37
|
-
|
|
37
|
+
obfuscated_line = obfuscator.obfuscate_bulk_insert_line(line, current_table_name, current_columns)
|
|
38
|
+
output_io.puts obfuscated_line unless obfuscated_line.empty?
|
|
38
39
|
else
|
|
39
40
|
output_io.write line
|
|
40
41
|
end
|
|
@@ -6,8 +6,9 @@ class MyObfuscate
|
|
|
6
6
|
if table_data = parse_insert_statement(line)
|
|
7
7
|
table_name = table_data[:table_name]
|
|
8
8
|
columns = table_data[:column_names]
|
|
9
|
+
ignore = table_data[:ignore]
|
|
9
10
|
if config[table_name]
|
|
10
|
-
output_io.puts obfuscator.obfuscate_bulk_insert_line(line, table_name, columns)
|
|
11
|
+
output_io.puts obfuscator.obfuscate_bulk_insert_line(line, table_name, columns, ignore)
|
|
11
12
|
else
|
|
12
13
|
$stderr.puts "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep."
|
|
13
14
|
output_io.write line
|
|
@@ -20,3 +21,5 @@ class MyObfuscate
|
|
|
20
21
|
|
|
21
22
|
end
|
|
22
23
|
end
|
|
24
|
+
|
|
25
|
+
|
data/lib/my_obfuscate/mysql.rb
CHANGED
|
@@ -1,31 +1,88 @@
|
|
|
1
|
+
#encoding: UTF-8
|
|
2
|
+
require 'stringio'
|
|
3
|
+
require 'strscan'
|
|
4
|
+
|
|
1
5
|
class MyObfuscate
|
|
2
6
|
class Mysql
|
|
3
7
|
include MyObfuscate::InsertStatementParser
|
|
8
|
+
include MyObfuscate::ConfigScaffoldGenerator
|
|
9
|
+
|
|
10
|
+
LPAREN = /\(/
|
|
11
|
+
RPAREN = /\)/
|
|
12
|
+
NULL_LITERAL = /NULL/
|
|
13
|
+
STRING_LITERAL = /'(\\\\|\\'|.)*?'/ # Matching "\\" followed by "\'" followed by . ensures proper escape handling
|
|
14
|
+
OTHER_LITERAL = /[^,\)]+/ # All other literals are terminated by separator or right paren
|
|
15
|
+
WHITESPACE = /[\s,;]+/ # We treat the "," separator and ";" terminator as whitespace
|
|
4
16
|
|
|
5
17
|
def parse_insert_statement(line)
|
|
6
18
|
if regex_match = insert_regex.match(line)
|
|
7
19
|
{
|
|
8
|
-
:
|
|
9
|
-
:
|
|
20
|
+
:ignore => !regex_match[1].nil?,
|
|
21
|
+
:table_name => regex_match[2].to_sym,
|
|
22
|
+
:column_names => regex_match[3].split(/`\s*,\s*`/).map { |col| col.gsub('`', "").to_sym }
|
|
10
23
|
}
|
|
11
24
|
end
|
|
12
25
|
end
|
|
13
26
|
|
|
14
|
-
def make_insert_statement(table_name, column_names,
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
27
|
+
def make_insert_statement(table_name, column_names, rows, ignore = nil)
|
|
28
|
+
buffer = StringIO.new
|
|
29
|
+
buffer.write "INSERT #{ignore ? 'IGNORE ' : '' }INTO `#{table_name}` (`#{column_names.join('`, `')}`) VALUES "
|
|
30
|
+
write_rows(buffer, rows)
|
|
31
|
+
buffer.write ";"
|
|
32
|
+
buffer.string
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def write_rows(buffer, rows)
|
|
36
|
+
rows.each_with_index do |row_values, i|
|
|
37
|
+
buffer.write("(")
|
|
38
|
+
write_row_values(buffer, row_values)
|
|
39
|
+
buffer.write(")")
|
|
40
|
+
buffer.write(",") if i < rows.length - 1
|
|
41
|
+
end
|
|
42
|
+
end
|
|
18
43
|
|
|
19
|
-
|
|
44
|
+
def write_row_values(buffer, row_values)
|
|
45
|
+
row_values.each_with_index do |value, j|
|
|
46
|
+
buffer.write value
|
|
47
|
+
buffer.write(",") if j < row_values.length - 1
|
|
48
|
+
end
|
|
20
49
|
end
|
|
21
50
|
|
|
22
51
|
def insert_regex
|
|
23
|
-
/^\s*INSERT INTO `(.*?)` \((.*?)\) VALUES\s*/i
|
|
52
|
+
/^\s*INSERT\s*(IGNORE )?\s*INTO `(.*?)` \((.*?)\) VALUES\s*/i
|
|
24
53
|
end
|
|
25
54
|
|
|
26
55
|
def rows_to_be_inserted(line)
|
|
27
|
-
|
|
28
|
-
|
|
56
|
+
scanner = StringScanner.new line
|
|
57
|
+
scanner.scan insert_regex
|
|
58
|
+
|
|
59
|
+
rows = []
|
|
60
|
+
row_values = []
|
|
61
|
+
until scanner.eos?
|
|
62
|
+
if scanner.scan(LPAREN)
|
|
63
|
+
# Left paren indicates the start of a row of (val1, val2, ..., valn)
|
|
64
|
+
row_values = []
|
|
65
|
+
elsif scanner.scan(RPAREN)
|
|
66
|
+
# Right paren indicates the end of a row of (val1, val2, ..., valn)
|
|
67
|
+
rows << row_values
|
|
68
|
+
elsif scanner.scan(NULL_LITERAL)
|
|
69
|
+
row_values << nil
|
|
70
|
+
elsif match = scanner.scan(STRING_LITERAL)
|
|
71
|
+
# We drop the leading and trailing quotes to extract the string
|
|
72
|
+
row_values << match.slice(1, match.length - 2)
|
|
73
|
+
elsif match = scanner.scan(OTHER_LITERAL)
|
|
74
|
+
# All other literals. We match these up to the "," separator or ")" closing paren.
|
|
75
|
+
# Hence we rstrip to drop any whitespace between the literal and the "," or ")".
|
|
76
|
+
row_values << match.rstrip
|
|
77
|
+
else
|
|
78
|
+
# This is minimal validation. We're assuming valid input generated by mysqldump.
|
|
79
|
+
raise "Parse error: unexpected token begginning at #{scanner.peek 80}"
|
|
80
|
+
end
|
|
81
|
+
# Ignore whitespace/separator after any token
|
|
82
|
+
scanner.scan(WHITESPACE)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
rows
|
|
29
86
|
end
|
|
30
87
|
|
|
31
88
|
def make_valid_value_string(value)
|
|
@@ -37,61 +94,5 @@ class MyObfuscate
|
|
|
37
94
|
"'" + value + "'"
|
|
38
95
|
end
|
|
39
96
|
end
|
|
40
|
-
|
|
41
|
-
# Be aware, strings must be quoted in single quotes!
|
|
42
|
-
def context_aware_mysql_string_split(string)
|
|
43
|
-
in_sub_insert = false
|
|
44
|
-
in_quoted_string = false
|
|
45
|
-
escaped = false
|
|
46
|
-
current_field = nil
|
|
47
|
-
length = string.length
|
|
48
|
-
fields = []
|
|
49
|
-
output = []
|
|
50
|
-
|
|
51
|
-
string.each_char do |i|
|
|
52
|
-
if escaped
|
|
53
|
-
escaped = false
|
|
54
|
-
current_field ||= ""
|
|
55
|
-
current_field << i
|
|
56
|
-
else
|
|
57
|
-
if i == "\\"
|
|
58
|
-
escaped = true
|
|
59
|
-
current_field ||= ""
|
|
60
|
-
current_field << i
|
|
61
|
-
elsif i == "(" && !in_quoted_string && !in_sub_insert
|
|
62
|
-
in_sub_insert = true
|
|
63
|
-
elsif i == ")" && !in_quoted_string && in_sub_insert
|
|
64
|
-
fields << current_field unless current_field.nil?
|
|
65
|
-
output << fields unless fields.length == 0
|
|
66
|
-
in_sub_insert = false
|
|
67
|
-
fields = []
|
|
68
|
-
current_field = nil
|
|
69
|
-
elsif i == "'" && !in_quoted_string
|
|
70
|
-
fields << current_field unless current_field.nil?
|
|
71
|
-
current_field = ''
|
|
72
|
-
in_quoted_string = true
|
|
73
|
-
elsif i == "'" && in_quoted_string
|
|
74
|
-
fields << current_field unless current_field.nil?
|
|
75
|
-
current_field = nil
|
|
76
|
-
in_quoted_string = false
|
|
77
|
-
elsif i == "," && !in_quoted_string && in_sub_insert
|
|
78
|
-
fields << current_field unless current_field.nil?
|
|
79
|
-
current_field = nil
|
|
80
|
-
elsif i == "L" && !in_quoted_string && in_sub_insert && current_field == "NUL"
|
|
81
|
-
current_field = nil
|
|
82
|
-
fields << current_field
|
|
83
|
-
elsif (i == " " || i == "\t") && !in_quoted_string
|
|
84
|
-
# Don't add whitespace not in a string
|
|
85
|
-
elsif in_sub_insert
|
|
86
|
-
current_field ||= ""
|
|
87
|
-
current_field << i
|
|
88
|
-
end
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
fields << current_field unless current_field.nil?
|
|
93
|
-
output << fields unless fields.length == 0
|
|
94
|
-
output
|
|
95
|
-
end
|
|
96
97
|
end
|
|
97
98
|
end
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
class MyObfuscate
|
|
2
2
|
class Postgres
|
|
3
3
|
include MyObfuscate::CopyStatementParser
|
|
4
|
+
include MyObfuscate::ConfigScaffoldGenerator
|
|
4
5
|
|
|
5
|
-
# Copy statements contain the column values tab
|
|
6
|
+
# Copy statements contain the column values tab separated like so:
|
|
6
7
|
# blah blah blah blah
|
|
7
8
|
# which we want to turn into:
|
|
8
9
|
# [['blah','blah','blah','blah']]
|
|
@@ -33,7 +34,7 @@ class MyObfuscate
|
|
|
33
34
|
end
|
|
34
35
|
end
|
|
35
36
|
|
|
36
|
-
def make_insert_statement(table_name, column_names, values)
|
|
37
|
+
def make_insert_statement(table_name, column_names, values, ignore = nil)
|
|
37
38
|
values.join("\t")
|
|
38
39
|
end
|
|
39
40
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
class MyObfuscate
|
|
2
2
|
class SqlServer
|
|
3
3
|
include MyObfuscate::InsertStatementParser
|
|
4
|
+
include MyObfuscate::ConfigScaffoldGenerator
|
|
4
5
|
|
|
5
6
|
def parse_insert_statement(line)
|
|
6
7
|
if regex_match = insert_regex.match(line)
|
|
@@ -26,7 +27,7 @@ class MyObfuscate
|
|
|
26
27
|
end
|
|
27
28
|
end
|
|
28
29
|
|
|
29
|
-
def make_insert_statement(table_name, column_names, values)
|
|
30
|
+
def make_insert_statement(table_name, column_names, values, ignore = nil)
|
|
30
31
|
values_strings = values.collect do |values|
|
|
31
32
|
"(" + values.join(",") + ")"
|
|
32
33
|
end.join(",")
|
data/lib/my_obfuscate/version.rb
CHANGED
data/lib/my_obfuscate.rb
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
require 'jcode' if RUBY_VERSION < '1.9'
|
|
2
2
|
require 'digest/md5'
|
|
3
|
-
require '
|
|
3
|
+
require 'faker'
|
|
4
4
|
require 'walker_method'
|
|
5
5
|
|
|
6
6
|
# Class for obfuscating MySQL dumps. This can parse mysqldump outputs when using the -c option, which includes
|
|
7
7
|
# column names in the insert statements.
|
|
8
8
|
class MyObfuscate
|
|
9
|
-
attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type
|
|
9
|
+
attr_accessor :config, :globally_kept_columns, :fail_on_unspecified_columns, :database_type, :scaffolded_tables
|
|
10
10
|
|
|
11
11
|
NUMBER_CHARS = "1234567890"
|
|
12
12
|
USERNAME_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" + NUMBER_CHARS
|
|
@@ -16,6 +16,7 @@ class MyObfuscate
|
|
|
16
16
|
# performed. See the README.rdoc file for more information.
|
|
17
17
|
def initialize(configuration = {})
|
|
18
18
|
@config = configuration
|
|
19
|
+
@scaffolded_tables = {}
|
|
19
20
|
end
|
|
20
21
|
|
|
21
22
|
def fail_on_unspecified_columns?
|
|
@@ -42,19 +43,29 @@ class MyObfuscate
|
|
|
42
43
|
database_helper.parse(self, config, input_io, output_io)
|
|
43
44
|
end
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
# Read an input stream and dump out a config file scaffold. These streams could be StringIO objects, Files,
|
|
47
|
+
# or STDIN and STDOUT.
|
|
48
|
+
def scaffold(input_io, output_io)
|
|
49
|
+
database_helper.generate_config(self, config, input_io, output_io)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def reassembling_each_insert(line, table_name, columns, ignore = nil)
|
|
46
53
|
output = database_helper.rows_to_be_inserted(line).map do |sub_insert|
|
|
47
54
|
result = yield(sub_insert)
|
|
48
55
|
result = result.map do |i|
|
|
49
56
|
database_helper.make_valid_value_string(i)
|
|
50
57
|
end
|
|
51
58
|
end
|
|
52
|
-
database_helper.make_insert_statement(table_name, columns, output)
|
|
59
|
+
database_helper.make_insert_statement(table_name, columns, output, ignore)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def extra_column_list(table_name, columns)
|
|
63
|
+
config_columns = (config[table_name] || {}).keys
|
|
64
|
+
config_columns - columns
|
|
53
65
|
end
|
|
54
66
|
|
|
55
67
|
def check_for_defined_columns_not_in_table(table_name, columns)
|
|
56
|
-
|
|
57
|
-
missing_columns = config[table_name].keys - columns
|
|
68
|
+
missing_columns = extra_column_list(table_name, columns)
|
|
58
69
|
unless missing_columns.length == 0
|
|
59
70
|
error_message = missing_columns.map do |missing_column|
|
|
60
71
|
"Column '#{missing_column}' could not be found in table '#{table_name}', please fix your obfuscator config."
|
|
@@ -63,8 +74,13 @@ class MyObfuscate
|
|
|
63
74
|
end
|
|
64
75
|
end
|
|
65
76
|
|
|
77
|
+
def missing_column_list(table_name, columns)
|
|
78
|
+
config_columns = (config[table_name] || {}).keys
|
|
79
|
+
columns - (config_columns + (globally_kept_columns || []).map {|i| i.to_sym}).uniq
|
|
80
|
+
end
|
|
81
|
+
|
|
66
82
|
def check_for_table_columns_not_in_definition(table_name, columns)
|
|
67
|
-
missing_columns =
|
|
83
|
+
missing_columns = missing_column_list(table_name, columns)
|
|
68
84
|
unless missing_columns.length == 0
|
|
69
85
|
error_message = missing_columns.map do |missing_column|
|
|
70
86
|
"Column '#{missing_column}' defined in table '#{table_name}', but not found in table definition, please fix your obfuscator config."
|
|
@@ -73,7 +89,7 @@ class MyObfuscate
|
|
|
73
89
|
end
|
|
74
90
|
end
|
|
75
91
|
|
|
76
|
-
def obfuscate_bulk_insert_line(line, table_name, columns)
|
|
92
|
+
def obfuscate_bulk_insert_line(line, table_name, columns, ignore = nil)
|
|
77
93
|
table_config = config[table_name]
|
|
78
94
|
if table_config == :truncate
|
|
79
95
|
""
|
|
@@ -83,7 +99,7 @@ class MyObfuscate
|
|
|
83
99
|
check_for_defined_columns_not_in_table(table_name, columns)
|
|
84
100
|
check_for_table_columns_not_in_definition(table_name, columns) if fail_on_unspecified_columns?
|
|
85
101
|
# Note: Remember to SQL escape strings in what you pass back.
|
|
86
|
-
reassembling_each_insert(line, table_name, columns) do |row|
|
|
102
|
+
reassembling_each_insert(line, table_name, columns, ignore) do |row|
|
|
87
103
|
ConfigApplicator.apply_table_config(row, table_config, columns)
|
|
88
104
|
end
|
|
89
105
|
end
|
|
@@ -93,6 +109,7 @@ end
|
|
|
93
109
|
|
|
94
110
|
require 'my_obfuscate/copy_statement_parser'
|
|
95
111
|
require 'my_obfuscate/insert_statement_parser'
|
|
112
|
+
require 'my_obfuscate/config_scaffold_generator'
|
|
96
113
|
require 'my_obfuscate/mysql'
|
|
97
114
|
require 'my_obfuscate/sql_server'
|
|
98
115
|
require 'my_obfuscate/postgres'
|
data/mise.toml
ADDED
data/my_obfuscate.gemspec
CHANGED
|
@@ -5,19 +5,19 @@ require "my_obfuscate/version"
|
|
|
5
5
|
Gem::Specification.new do |s|
|
|
6
6
|
s.name = %q{my_obfuscate}
|
|
7
7
|
s.version = MyObfuscate::VERSION
|
|
8
|
+
s.required_ruby_version = ">= 3"
|
|
9
|
+
s.license = "MIT"
|
|
8
10
|
|
|
9
11
|
s.authors = ["Andrew Cantino", "Dave Willett", "Mike Grafton", "Mason Glaves", "Greg Bell", "Mavenlink"]
|
|
10
12
|
s.description = %q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
|
|
11
|
-
s.
|
|
12
|
-
s.homepage = %q{http://github.com/mavenlink/my_obfuscate}
|
|
13
|
+
s.homepage = %q{https://github.com/mavenlink/my_obfuscate}
|
|
13
14
|
s.summary = %q{Standalone Ruby code for the selective rewriting of MySQL dumps in order to protect user privacy.}
|
|
14
15
|
|
|
15
|
-
s.add_dependency "
|
|
16
|
+
s.add_dependency "faker"
|
|
16
17
|
s.add_dependency "walker_method"
|
|
17
18
|
s.add_development_dependency "rspec"
|
|
18
19
|
|
|
19
|
-
s.files
|
|
20
|
-
s.
|
|
21
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
20
|
+
s.files = `git ls-files -z`.split("\x0")
|
|
21
|
+
s.executables = `git ls-files -z -- bin/*`.split("\x0").map { |f| File.basename(f) }
|
|
22
22
|
s.require_paths = ["lib"]
|
|
23
23
|
end
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require 'spec_helper'
|
|
2
|
+
require 'uri'
|
|
2
3
|
|
|
3
4
|
describe MyObfuscate::ConfigApplicator do
|
|
4
5
|
|
|
@@ -7,7 +8,7 @@ describe MyObfuscate::ConfigApplicator do
|
|
|
7
8
|
100.times do
|
|
8
9
|
new_row = MyObfuscate::ConfigApplicator.apply_table_config(["blah", "something_else"], {:a => {:type => :email}}, [:a, :b])
|
|
9
10
|
expect(new_row.length).to eq(2)
|
|
10
|
-
expect(new_row.first).to match(
|
|
11
|
+
expect(new_row.first).to match(URI::MailTo::EMAIL_REGEXP)
|
|
11
12
|
end
|
|
12
13
|
end
|
|
13
14
|
|
|
@@ -73,7 +73,14 @@ describe MyObfuscate::Mysql do
|
|
|
73
73
|
|
|
74
74
|
it "should return a hash of table name, column names for MySQL insert statements" do
|
|
75
75
|
hash = subject.parse_insert_statement("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
|
|
76
|
-
expect(hash).to eq({:table_name => :some_table, :column_names => [:email, :name, :something, :age]})
|
|
76
|
+
expect(hash).to eq({:ignore => false, :table_name => :some_table, :column_names => [:email, :name, :something, :age]})
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
describe "#parse_insert_ignore_statement" do
|
|
81
|
+
it "should return a hash of IGNORE, table name, column names for MySQL insert statements" do
|
|
82
|
+
hash = subject.parse_insert_statement("INSERT IGNORE INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
|
|
83
|
+
expect(hash).to eq({:ignore => true, :table_name => :some_table, :column_names => [:email, :name, :something, :age]})
|
|
77
84
|
end
|
|
78
85
|
end
|
|
79
86
|
|
data/spec/my_obfuscate_spec.rb
CHANGED
|
@@ -73,6 +73,31 @@ COPY some_table_to_keep (a, b) FROM stdin;
|
|
|
73
73
|
output.read
|
|
74
74
|
end
|
|
75
75
|
|
|
76
|
+
let(:scaffolder) do
|
|
77
|
+
MyObfuscate.new({
|
|
78
|
+
:some_other_table => {
|
|
79
|
+
:email => {:type => :email, :skip_regexes => [/^[\w\.\_]+@honk\.com$/i, /^dontmurderme@direwolf.com$/]},
|
|
80
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
81
|
+
:age => {:type => :integer, :between => 10...80, :unless => :nil },
|
|
82
|
+
},
|
|
83
|
+
:single_column_table => {
|
|
84
|
+
:id => {:type => :integer, :between => 2..9, :unless => :nil}
|
|
85
|
+
},
|
|
86
|
+
:another_table => :truncate,
|
|
87
|
+
:some_table_to_keep => :keep
|
|
88
|
+
}).tap do |scaffolder|
|
|
89
|
+
scaffolder.database_type = :postgres
|
|
90
|
+
scaffolder.globally_kept_columns = %w[age]
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
let(:scaffold_output_string) do
|
|
95
|
+
output = StringIO.new
|
|
96
|
+
scaffolder.scaffold(dump, output)
|
|
97
|
+
output.rewind
|
|
98
|
+
output.read
|
|
99
|
+
end
|
|
100
|
+
|
|
76
101
|
it "is able to obfuscate single column tables" do
|
|
77
102
|
expect(output_string).not_to include("1\n2\n")
|
|
78
103
|
expect(output_string).to match(/\d\n\d\n/)
|
|
@@ -106,7 +131,30 @@ COPY some_table_to_keep (a, b) FROM stdin;
|
|
|
106
131
|
expect { output_string }.to raise_error RuntimeError
|
|
107
132
|
end
|
|
108
133
|
end
|
|
134
|
+
|
|
135
|
+
it "when there is no existing config, should scaffold all the columns that are not globally kept" do
|
|
136
|
+
expect(scaffold_output_string).to match(/:email\s+=>\s+:keep.+scaffold/)
|
|
137
|
+
expect(scaffold_output_string).to match(/:name\s+=>\s+:keep.+scaffold/)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
it "should not scaffold a columns that is globally kept" do
|
|
141
|
+
expect(scaffold_output_string).not_to match(/:age\s+=>\s+:keep.+scaffold/)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
context "when dump contains a '.' at the end of the line" do
|
|
145
|
+
let(:dump) do
|
|
146
|
+
StringIO.new(<<-SQL)
|
|
147
|
+
COPY another_table (a, b, c, d) FROM stdin;
|
|
148
|
+
1 2 3 4
|
|
149
|
+
1 2 3 .
|
|
150
|
+
\.
|
|
151
|
+
SQL
|
|
152
|
+
end
|
|
153
|
+
it "should not fail if a insert statement ends in a '.''" do
|
|
154
|
+
expect(output_string).not_to match(/1\t2\t3\t\./)
|
|
155
|
+
end
|
|
109
156
|
end
|
|
157
|
+
end
|
|
110
158
|
|
|
111
159
|
describe "when using MySQL" do
|
|
112
160
|
context "when there is nothing to obfuscate" do
|
|
@@ -143,6 +191,7 @@ COPY some_table_to_keep (a, b) FROM stdin;
|
|
|
143
191
|
end
|
|
144
192
|
end
|
|
145
193
|
|
|
194
|
+
|
|
146
195
|
context "when there is something to obfuscate" do
|
|
147
196
|
before do
|
|
148
197
|
@database_dump = StringIO.new(<<-SQL)
|
|
@@ -164,7 +213,7 @@ COPY some_table_to_keep (a, b) FROM stdin;
|
|
|
164
213
|
:one_more_table => {
|
|
165
214
|
# Note: fixed strings must be pre-SQL escaped!
|
|
166
215
|
:password => {:type => :fixed, :string => "monkey"},
|
|
167
|
-
:c => {:type => :null}
|
|
216
|
+
:c => {:type => :null},
|
|
168
217
|
}
|
|
169
218
|
})
|
|
170
219
|
@output = StringIO.new
|
|
@@ -237,6 +286,189 @@ COPY some_table_to_keep (a, b) FROM stdin;
|
|
|
237
286
|
}.not_to raise_error
|
|
238
287
|
end
|
|
239
288
|
end
|
|
289
|
+
|
|
290
|
+
context "when there is an existing config to scaffold" do
|
|
291
|
+
before do
|
|
292
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
293
|
+
INSERT IGNORE INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);
|
|
294
|
+
SQL
|
|
295
|
+
@ddo = MyObfuscate.new({
|
|
296
|
+
:some_table => {
|
|
297
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
298
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS}
|
|
299
|
+
},
|
|
300
|
+
:another_table => :truncate
|
|
301
|
+
})
|
|
302
|
+
@ddo.globally_kept_columns = %w[something]
|
|
303
|
+
@output = StringIO.new
|
|
304
|
+
@ddo.scaffold(@database_dump, @output)
|
|
305
|
+
@output.rewind
|
|
306
|
+
@output_string = @output.read
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
it "should scaffold missing columns" do
|
|
310
|
+
expect(@output_string).to match(/:age\s+=>\s+:keep.+scaffold/)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
it "should not scaffold globally_kept_columns" do
|
|
314
|
+
expect(@output_string).not_to match(/:something\s+=>\s+:keep.+scaffold/)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
it "should pass through correct columns" do
|
|
318
|
+
expect(@output_string).not_to match(/:email\s+=>\s+:keep.+scaffold/)
|
|
319
|
+
expect(@output_string).to match(/:email\s+=>/)
|
|
320
|
+
expect(@output_string).not_to match(/\#\s*:email/)
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
context "when using :secondary_address" do
|
|
325
|
+
before do
|
|
326
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
327
|
+
INSERT INTO `some_table` (`email`, `name`, `something`, `age`, `address1`, `address2`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25, '221B Baker St', 'Suite 100'),('joe@joe.com','joe', 'somethingelse2', 54, '1300 Pennsylvania Ave', '2nd floor');
|
|
328
|
+
SQL
|
|
329
|
+
@ddo = MyObfuscate.new({
|
|
330
|
+
:some_table => {
|
|
331
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
332
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
333
|
+
:something => :keep,
|
|
334
|
+
:age => :keep,
|
|
335
|
+
:address1 => :street_address,
|
|
336
|
+
:address2 => :secondary_address
|
|
337
|
+
}})
|
|
338
|
+
@output = StringIO.new
|
|
339
|
+
@ddo.obfuscate(@database_dump, @output)
|
|
340
|
+
@output.rewind
|
|
341
|
+
@output_string = @output.read
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
it "should obfuscate address1" do
|
|
345
|
+
expect(@output_string).to include("address1")
|
|
346
|
+
expect(@output_string).not_to include("Baker St")
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
it "should obfuscate address2" do
|
|
350
|
+
expect(@output_string).to include("address2")
|
|
351
|
+
expect(@output_string).not_to include("Suite 100")
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
context "when there is an existing config to scaffold" do
|
|
356
|
+
before do
|
|
357
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
358
|
+
INSERT INTO `some_table` (`email`, `name`, `something`, `age`, `address1`, `address2`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25, '221B Baker St', 'Suite 100'),('joe@joe.com','joe', 'somethingelse2', 54, '1300 Pennsylvania Ave', '2nd floor');
|
|
359
|
+
SQL
|
|
360
|
+
@ddo = MyObfuscate.new({
|
|
361
|
+
:some_table => {
|
|
362
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
363
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
364
|
+
:something => :keep,
|
|
365
|
+
:age => :keep,
|
|
366
|
+
:gender => {:type => :fixed, :string => "m"},
|
|
367
|
+
:address1 => :street_address,
|
|
368
|
+
:address2 => :secondary_address
|
|
369
|
+
}})
|
|
370
|
+
@output = StringIO.new
|
|
371
|
+
@ddo.scaffold(@database_dump, @output)
|
|
372
|
+
@output.rewind
|
|
373
|
+
@output_string = @output.read
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
it "should enumerate extra columns" do
|
|
377
|
+
expect(@output_string).to match(/\#\s*:gender\s+=>\s+\{type:\s+:fixed,\s*string:.*#\s*unreferenced/)
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
it "should pass through existing columns" do
|
|
381
|
+
expect(@output_string).to match(/:age\s+=>\s+:keep\s*,/)
|
|
382
|
+
expect(@output_string).to match(/:address2\s+=>\s*:secondary_address/)
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
context "when there is an existing config to scaffold with both missing and extra columns" do
|
|
387
|
+
before do
|
|
388
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
389
|
+
INSERT IGNORE INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);
|
|
390
|
+
SQL
|
|
391
|
+
@ddo = MyObfuscate.new({
|
|
392
|
+
:some_table => {
|
|
393
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
394
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
395
|
+
:gender => {:type => :fixed, :string => "m"}
|
|
396
|
+
}})
|
|
397
|
+
@output = StringIO.new
|
|
398
|
+
@ddo.scaffold(@database_dump, @output)
|
|
399
|
+
@output.rewind
|
|
400
|
+
@output_string = @output.read
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
it "should scaffold missing columns" do
|
|
404
|
+
expect(@output_string).to match(/:age\s+=>\s+:keep.+scaffold/)
|
|
405
|
+
expect(@output_string).to match(/:something\s+=>\s+:keep.+scaffold/)
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
it "should enumerate extra columns" do
|
|
409
|
+
expect(@output_string).to match(/\#\s*:gender/)
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
context "when there is an existing config to scaffold and it is just right" do
|
|
414
|
+
before do
|
|
415
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
416
|
+
INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);
|
|
417
|
+
SQL
|
|
418
|
+
@ddo = MyObfuscate.new({
|
|
419
|
+
:some_table => {
|
|
420
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
421
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
422
|
+
:something => :keep,
|
|
423
|
+
:age => :keep
|
|
424
|
+
}})
|
|
425
|
+
@output = StringIO.new
|
|
426
|
+
@ddo.scaffold(@database_dump, @output)
|
|
427
|
+
@output.rewind
|
|
428
|
+
@output_string = @output.read
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
it "should say that everything is present and accounted for" do
|
|
432
|
+
expect(@output_string).to match(/^\s*\#.*account/)
|
|
433
|
+
expect(@output_string).not_to include("scaffold")
|
|
434
|
+
expect(@output_string).not_to include(":some_table")
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
context "when scaffolding a table with no existing config" do
|
|
439
|
+
before do
|
|
440
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
441
|
+
INSERT INTO `some_table` (`email`, `name`, `something`, `age_of_the_individual_who_is_specified_by_this_row_of_the_table`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);
|
|
442
|
+
SQL
|
|
443
|
+
@ddo = MyObfuscate.new({
|
|
444
|
+
:some_other_table => {
|
|
445
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
446
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
447
|
+
:something => :keep,
|
|
448
|
+
:age_of_the_individual_who_is_specified_by_this_row_of_the_table => :keep
|
|
449
|
+
}})
|
|
450
|
+
@ddo.globally_kept_columns = %w[name]
|
|
451
|
+
|
|
452
|
+
@output = StringIO.new
|
|
453
|
+
@ddo.scaffold(@database_dump, @output)
|
|
454
|
+
@output.rewind
|
|
455
|
+
@output_string = @output.read
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
it "should scaffold all the columns that are not globally kept" do
|
|
459
|
+
expect(@output_string).to match(/:email\s+=>\s+:keep.+scaffold/)
|
|
460
|
+
expect(@output_string).to match(/:something\s+=>\s+:keep.+scaffold/)
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
it "should not scaffold globally kept columns" do
|
|
464
|
+
expect(@output_string).not_to match(/:name\s+=>\s+:keep.+scaffold/)
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
it "should preserve long column names" do
|
|
468
|
+
expect(@output_string).to match(/:age_of_the_individual_who_is_specified_by_this_row_of_the_table/)
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
end
|
|
240
472
|
end
|
|
241
473
|
|
|
242
474
|
describe "when using MS SQL Server" do
|
|
@@ -389,6 +621,148 @@ COPY some_table_to_keep (a, b) FROM stdin;
|
|
|
389
621
|
}.not_to raise_error
|
|
390
622
|
end
|
|
391
623
|
end
|
|
624
|
+
|
|
625
|
+
context "when there is an existing config to scaffold and it is missing columns" do
|
|
626
|
+
before do
|
|
627
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
628
|
+
INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
|
|
629
|
+
SQL
|
|
630
|
+
@ddo = MyObfuscate.new({
|
|
631
|
+
:some_table => {
|
|
632
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
633
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS}
|
|
634
|
+
}})
|
|
635
|
+
@ddo.database_type = :sql_server
|
|
636
|
+
@ddo.globally_kept_columns = %w[something]
|
|
637
|
+
@output = StringIO.new
|
|
638
|
+
@ddo.scaffold(@database_dump, @output)
|
|
639
|
+
@output.rewind
|
|
640
|
+
@output_string = @output.read
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
it "should scaffold columns that can't be found" do
|
|
644
|
+
expect(@output_string).to match(/:age\s+=>\s+:keep.+scaffold/)
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
it "should not scaffold globally_kept_columns" do
|
|
648
|
+
expect(@output_string).not_to match(/:something\s+=>\s+:keep.+scaffold/)
|
|
649
|
+
end
|
|
650
|
+
end
|
|
651
|
+
|
|
652
|
+
context "when there is an existing config to scaffold and it has extra columns" do
|
|
653
|
+
before do
|
|
654
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
655
|
+
INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
|
|
656
|
+
SQL
|
|
657
|
+
@ddo = MyObfuscate.new({
|
|
658
|
+
:some_table => {
|
|
659
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
660
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
661
|
+
:something => :keep,
|
|
662
|
+
:age => :keep,
|
|
663
|
+
:gender => {:type => :fixed, :string => "m"}
|
|
664
|
+
}})
|
|
665
|
+
@ddo.database_type = :sql_server
|
|
666
|
+
|
|
667
|
+
@output = StringIO.new
|
|
668
|
+
@ddo.scaffold(@database_dump, @output)
|
|
669
|
+
@output.rewind
|
|
670
|
+
@output_string = @output.read
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
it "should enumerate extra columns" do
|
|
674
|
+
expect(@output_string).to match(/\#\s*:gender/)
|
|
675
|
+
end
|
|
676
|
+
end
|
|
677
|
+
|
|
678
|
+
context "when there is an existing config to scaffold and it has both missing and extra columns" do
|
|
679
|
+
before do
|
|
680
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
681
|
+
INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
|
|
682
|
+
SQL
|
|
683
|
+
@ddo = MyObfuscate.new({
|
|
684
|
+
:some_table => {
|
|
685
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
686
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
687
|
+
:gender => {:type => :fixed, :string => "m"}
|
|
688
|
+
}})
|
|
689
|
+
@ddo.database_type = :sql_server
|
|
690
|
+
|
|
691
|
+
@output = StringIO.new
|
|
692
|
+
@ddo.scaffold(@database_dump, @output)
|
|
693
|
+
@output.rewind
|
|
694
|
+
@output_string = @output.read
|
|
695
|
+
end
|
|
696
|
+
|
|
697
|
+
it "should scaffold columns that can't be found" do
|
|
698
|
+
expect(@output_string).to match(/:age\s+=>\s+:keep.+scaffold/)
|
|
699
|
+
expect(@output_string).to match(/:something\s+=>\s+:keep.+scaffold/)
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
it "should enumerate extra columns" do
|
|
703
|
+
expect(@output_string).to match(/\#\s*:gender/)
|
|
704
|
+
end
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
context "when there is an existing config to scaffold and it is just right" do
|
|
708
|
+
before do
|
|
709
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
710
|
+
INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
|
|
711
|
+
SQL
|
|
712
|
+
@ddo = MyObfuscate.new({
|
|
713
|
+
:some_table => {
|
|
714
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
715
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
716
|
+
:something => :keep,
|
|
717
|
+
:age => :keep
|
|
718
|
+
}})
|
|
719
|
+
@ddo.database_type = :sql_server
|
|
720
|
+
|
|
721
|
+
@output = StringIO.new
|
|
722
|
+
@ddo.scaffold(@database_dump, @output)
|
|
723
|
+
@output.rewind
|
|
724
|
+
@output_string = @output.read
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
it "should say that everything is present and accounted for" do
|
|
728
|
+
expect(@output_string).to match(/^\s*\#.*account/)
|
|
729
|
+
expect(@output_string).not_to include("scaffold")
|
|
730
|
+
expect(@output_string).not_to include(":some_table")
|
|
731
|
+
end
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
context "when scaffolding a table with no existing config" do
|
|
735
|
+
before do
|
|
736
|
+
@database_dump = StringIO.new(<<-SQL)
|
|
737
|
+
INSERT [dbo].[some_table] ([email], [name], [something], [age]) VALUES ('bob@honk.com','bob', 'some''thin,ge())lse1', 25);
|
|
738
|
+
SQL
|
|
739
|
+
@ddo = MyObfuscate.new({
|
|
740
|
+
:some_other_table => {
|
|
741
|
+
:email => {:type => :email, :honk_email_skip => true},
|
|
742
|
+
:name => {:type => :string, :length => 8, :chars => MyObfuscate::USERNAME_CHARS},
|
|
743
|
+
:something => :keep,
|
|
744
|
+
:age => :keep
|
|
745
|
+
}})
|
|
746
|
+
@ddo.database_type = :sql_server
|
|
747
|
+
@ddo.globally_kept_columns = %w[age]
|
|
748
|
+
|
|
749
|
+
@output = StringIO.new
|
|
750
|
+
@ddo.scaffold(@database_dump, @output)
|
|
751
|
+
@output.rewind
|
|
752
|
+
@output_string = @output.read
|
|
753
|
+
end
|
|
754
|
+
|
|
755
|
+
it "should scaffold all the columns that are not globally kept" do
|
|
756
|
+
expect(@output_string).to match(/:email\s+=>\s+:keep.+scaffold/)
|
|
757
|
+
expect(@output_string).to match(/:name\s+=>\s+:keep.+scaffold/)
|
|
758
|
+
expect(@output_string).to match(/:something\s+=>\s+:keep.+scaffold/)
|
|
759
|
+
end
|
|
760
|
+
|
|
761
|
+
it "should not scaffold globally kept columns" do
|
|
762
|
+
expect(@output_string).not_to match(/:age\s+=>\s+:keep.+scaffold/)
|
|
763
|
+
end
|
|
764
|
+
end
|
|
765
|
+
|
|
392
766
|
end
|
|
393
767
|
end
|
|
394
768
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: my_obfuscate
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: '0.6'
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Cantino
|
|
@@ -10,13 +10,12 @@ authors:
|
|
|
10
10
|
- Mason Glaves
|
|
11
11
|
- Greg Bell
|
|
12
12
|
- Mavenlink
|
|
13
|
-
autorequire:
|
|
14
13
|
bindir: bin
|
|
15
14
|
cert_chain: []
|
|
16
|
-
date:
|
|
15
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
17
16
|
dependencies:
|
|
18
17
|
- !ruby/object:Gem::Dependency
|
|
19
|
-
name:
|
|
18
|
+
name: faker
|
|
20
19
|
requirement: !ruby/object:Gem::Requirement
|
|
21
20
|
requirements:
|
|
22
21
|
- - ">="
|
|
@@ -59,7 +58,6 @@ dependencies:
|
|
|
59
58
|
version: '0'
|
|
60
59
|
description: Standalone Ruby code for the selective rewriting of MySQL dumps in order
|
|
61
60
|
to protect user privacy.
|
|
62
|
-
email: andrew@mavenlink.com
|
|
63
61
|
executables: []
|
|
64
62
|
extensions: []
|
|
65
63
|
extra_rdoc_files: []
|
|
@@ -75,6 +73,7 @@ files:
|
|
|
75
73
|
- Rakefile
|
|
76
74
|
- lib/my_obfuscate.rb
|
|
77
75
|
- lib/my_obfuscate/config_applicator.rb
|
|
76
|
+
- lib/my_obfuscate/config_scaffold_generator.rb
|
|
78
77
|
- lib/my_obfuscate/copy_statement_parser.rb
|
|
79
78
|
- lib/my_obfuscate/data/en_50K.txt
|
|
80
79
|
- lib/my_obfuscate/insert_statement_parser.rb
|
|
@@ -82,6 +81,7 @@ files:
|
|
|
82
81
|
- lib/my_obfuscate/postgres.rb
|
|
83
82
|
- lib/my_obfuscate/sql_server.rb
|
|
84
83
|
- lib/my_obfuscate/version.rb
|
|
84
|
+
- mise.toml
|
|
85
85
|
- my_obfuscate.gemspec
|
|
86
86
|
- spec/my_obfuscate/config_applicator_spec.rb
|
|
87
87
|
- spec/my_obfuscate/mysql_spec.rb
|
|
@@ -89,10 +89,10 @@ files:
|
|
|
89
89
|
- spec/my_obfuscate/sql_server_spec.rb
|
|
90
90
|
- spec/my_obfuscate_spec.rb
|
|
91
91
|
- spec/spec_helper.rb
|
|
92
|
-
homepage:
|
|
93
|
-
licenses:
|
|
92
|
+
homepage: https://github.com/mavenlink/my_obfuscate
|
|
93
|
+
licenses:
|
|
94
|
+
- MIT
|
|
94
95
|
metadata: {}
|
|
95
|
-
post_install_message:
|
|
96
96
|
rdoc_options: []
|
|
97
97
|
require_paths:
|
|
98
98
|
- lib
|
|
@@ -100,23 +100,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
100
100
|
requirements:
|
|
101
101
|
- - ">="
|
|
102
102
|
- !ruby/object:Gem::Version
|
|
103
|
-
version: '
|
|
103
|
+
version: '3'
|
|
104
104
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
105
|
requirements:
|
|
106
106
|
- - ">="
|
|
107
107
|
- !ruby/object:Gem::Version
|
|
108
108
|
version: '0'
|
|
109
109
|
requirements: []
|
|
110
|
-
|
|
111
|
-
rubygems_version: 2.2.2
|
|
112
|
-
signing_key:
|
|
110
|
+
rubygems_version: 3.6.9
|
|
113
111
|
specification_version: 4
|
|
114
112
|
summary: Standalone Ruby code for the selective rewriting of MySQL dumps in order
|
|
115
113
|
to protect user privacy.
|
|
116
|
-
test_files:
|
|
117
|
-
- spec/my_obfuscate/config_applicator_spec.rb
|
|
118
|
-
- spec/my_obfuscate/mysql_spec.rb
|
|
119
|
-
- spec/my_obfuscate/postgres_spec.rb
|
|
120
|
-
- spec/my_obfuscate/sql_server_spec.rb
|
|
121
|
-
- spec/my_obfuscate_spec.rb
|
|
122
|
-
- spec/spec_helper.rb
|
|
114
|
+
test_files: []
|