masking 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codeclimate.yml +14 -0
- data/.gitignore +16 -0
- data/.mdlrc +1 -0
- data/.rubocop.yml +18 -0
- data/.ruby-version +1 -0
- data/.travis.yml +9 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +119 -0
- data/LICENSE.txt +21 -0
- data/README.md +166 -0
- data/Rakefile +11 -0
- data/bin/console +11 -0
- data/bin/masking_profile +58 -0
- data/bin/setup +10 -0
- data/config/.keep +0 -0
- data/exe/masking +7 -0
- data/lib/masking.rb +31 -0
- data/lib/masking/cli.rb +42 -0
- data/lib/masking/cli/error_message.rb +36 -0
- data/lib/masking/cli/error_messages.yml +6 -0
- data/lib/masking/config.rb +33 -0
- data/lib/masking/config/target_columns.rb +52 -0
- data/lib/masking/config/target_columns/column.rb +32 -0
- data/lib/masking/config/target_columns/method.rb +41 -0
- data/lib/masking/config/target_columns/method/binary.rb +23 -0
- data/lib/masking/config/target_columns/method/boolean.rb +29 -0
- data/lib/masking/config/target_columns/method/date.rb +30 -0
- data/lib/masking/config/target_columns/method/float.rb +23 -0
- data/lib/masking/config/target_columns/method/integer.rb +23 -0
- data/lib/masking/config/target_columns/method/null.rb +17 -0
- data/lib/masking/config/target_columns/method/string.rb +33 -0
- data/lib/masking/config/target_columns/method/string_binary_distinctor.rb +31 -0
- data/lib/masking/config/target_columns/method/time.rb +28 -0
- data/lib/masking/config/target_columns/table.rb +24 -0
- data/lib/masking/data_mask_processor.rb +44 -0
- data/lib/masking/errors.rb +9 -0
- data/lib/masking/insert_statement.rb +74 -0
- data/lib/masking/insert_statement/sql_builder.rb +34 -0
- data/lib/masking/insert_statement/value.rb +30 -0
- data/lib/masking/sql_dump_line.rb +24 -0
- data/lib/masking/version.rb +5 -0
- data/masking.gemspec +46 -0
- data/masking.yml.sample +17 -0
- metadata +259 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
module Masking
|
6
|
+
class Config
|
7
|
+
class TargetColumns
|
8
|
+
class Method
|
9
|
+
class Date
|
10
|
+
def initialize(value)
|
11
|
+
@date = value
|
12
|
+
end
|
13
|
+
|
14
|
+
def call
|
15
|
+
"'#{date_format}'"
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
attr_reader :date
|
21
|
+
FORMAT = '%Y-%m-%d'
|
22
|
+
|
23
|
+
def date_format
|
24
|
+
date.strftime(FORMAT)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Masking
|
4
|
+
class Config
|
5
|
+
class TargetColumns
|
6
|
+
class Method
|
7
|
+
class Float
|
8
|
+
def initialize(value)
|
9
|
+
@float = value
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
float.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
attr_reader :float
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Masking
|
4
|
+
class Config
|
5
|
+
class TargetColumns
|
6
|
+
class Method
|
7
|
+
class Integer
|
8
|
+
def initialize(value)
|
9
|
+
@integer = value
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
integer.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
attr_reader :integer
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Masking
|
4
|
+
class Config
|
5
|
+
class TargetColumns
|
6
|
+
class Method
|
7
|
+
class String
|
8
|
+
def initialize(value)
|
9
|
+
@string = value
|
10
|
+
@sequence = 0
|
11
|
+
end
|
12
|
+
|
13
|
+
def call
|
14
|
+
"'#{output}'".b
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :string
|
20
|
+
SEQUENTIAL_NUMBER_PLACEHOLDER = /%{n}/.freeze
|
21
|
+
|
22
|
+
def output
|
23
|
+
string.sub(SEQUENTIAL_NUMBER_PLACEHOLDER, sequence.to_s)
|
24
|
+
end
|
25
|
+
|
26
|
+
def sequence
|
27
|
+
@sequence += 1
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'masking/config/target_columns/method/binary'
|
4
|
+
require 'masking/config/target_columns/method/string'
|
5
|
+
|
6
|
+
module Masking
|
7
|
+
class Config
|
8
|
+
class TargetColumns
|
9
|
+
class Method
|
10
|
+
module StringBinaryDistinctor
|
11
|
+
class << self
|
12
|
+
def new(value)
|
13
|
+
binary?(value) ? Binary.new(value) : String.new(value)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
# NOTE: this is referenced code from standard library
|
19
|
+
# ruby/psych: Rely on encoding tags to determine if string should be dumped as binary
|
20
|
+
# https://github.com/ruby/psych/commit/8949a47b8cee31e03e21608406ba116adcf74054
|
21
|
+
# https://github.com/ruby/psych/issues/278
|
22
|
+
# https://github.com/ruby/psych/blob/e01839af57df559b26f74e906062be6c692c89c8/lib/psych/visitors/yaml_tree.rb#L419-L421
|
23
|
+
def binary?(string)
|
24
|
+
string.encoding == Encoding::ASCII_8BIT
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Masking
|
4
|
+
class Config
|
5
|
+
class TargetColumns
|
6
|
+
class Method
|
7
|
+
class Time
|
8
|
+
def initialize(value)
|
9
|
+
@time = value
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
"'#{time_format}'"
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
attr_reader :time
|
19
|
+
FORMAT = '%Y-%m-%d %H:%M:%S'
|
20
|
+
|
21
|
+
def time_format
|
22
|
+
time.strftime(FORMAT)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'masking/config/target_columns/column'
|
4
|
+
|
5
|
+
module Masking
|
6
|
+
class Config
|
7
|
+
class TargetColumns
|
8
|
+
class Table
|
9
|
+
attr_reader :name, :columns
|
10
|
+
|
11
|
+
def initialize(name, columns:)
|
12
|
+
@name = name.to_sym
|
13
|
+
@columns = columns.map do |column, method_value|
|
14
|
+
Masking::Config::TargetColumns::Column.new(column, table_name: self.name, method_value: method_value)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
name == other.name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'masking/config/target_columns'
|
4
|
+
require 'masking/insert_statement'
|
5
|
+
|
6
|
+
module Masking
|
7
|
+
# TODO: find better naming/modeling of DataMaskProcessor
|
8
|
+
class DataMaskProcessor
|
9
|
+
class << self
|
10
|
+
def process(insert_statement_line, target_columns: ::Masking.config.target_columns)
|
11
|
+
new(insert_statement_line, target_columns: target_columns).send(:process)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :raw_line, :target_columns, :insert_statement
|
18
|
+
|
19
|
+
def initialize(insert_statement_line, target_columns:)
|
20
|
+
@raw_line = insert_statement_line
|
21
|
+
@target_columns = target_columns
|
22
|
+
@insert_statement = InsertStatement.new(insert_statement_line)
|
23
|
+
end
|
24
|
+
|
25
|
+
# TODO: define insert_statement.mask_values(column, mask_method) method & refactoring
|
26
|
+
# rubocop:disable Metrics/AbcSize
|
27
|
+
def process
|
28
|
+
return raw_line unless target_table?
|
29
|
+
|
30
|
+
target_columns.columns(table_name: insert_statement.table).each do |target_column|
|
31
|
+
insert_statement.values.map do |value|
|
32
|
+
value[target_column.name] = target_column.masked_value if value.column?(target_column.name)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
insert_statement.sql
|
37
|
+
end
|
38
|
+
# rubocop:enable Metrics/AbcSize
|
39
|
+
|
40
|
+
def target_table?
|
41
|
+
target_columns.contains?(table_name: insert_statement.table)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'masking/insert_statement/value'
|
4
|
+
require 'masking/insert_statement/sql_builder'
|
5
|
+
|
6
|
+
module Masking
|
7
|
+
class InsertStatement
|
8
|
+
attr_reader :raw_statement, :table
|
9
|
+
|
10
|
+
def initialize(raw_statement)
|
11
|
+
@raw_statement = raw_statement
|
12
|
+
|
13
|
+
PARSE_REGEXP.match(raw_statement).tap do |match_data|
|
14
|
+
@table = match_data[:table]
|
15
|
+
@columns_section = match_data[:columns_section]
|
16
|
+
@values_section = match_data[:values_section]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def columns
|
21
|
+
# NOTE: define and extract to ColumnSet class?
|
22
|
+
@columns ||= columns_section.scan(COLUMNS_REGEXP).flatten.map(&:to_sym)
|
23
|
+
end
|
24
|
+
|
25
|
+
def values
|
26
|
+
# NOTE: define and extract to ValueSet class?
|
27
|
+
@values ||= values_section.split(VALUE_ROW_SPLITTER)
|
28
|
+
.tap { |rows| rows.each_with_index { |_, i| recursive_pattern_value_concat(rows, i) } }
|
29
|
+
.map { |row| row.scan(values_regexp).flatten }
|
30
|
+
.map { |data| Value.new(columns: columns, data: data) }
|
31
|
+
end
|
32
|
+
|
33
|
+
def sql
|
34
|
+
SQLBuilder.build(table: table, columns: columns, values: values)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
attr_reader :columns_section, :values_section
|
40
|
+
|
41
|
+
VALUE_ROW_SPLITTER = '),('
|
42
|
+
PARSE_REGEXP = /INSERT INTO `(?<table>.+)` \((?<columns_section>.+)\) VALUES (?<values_section>.+);/.freeze
|
43
|
+
COLUMNS_REGEXP = /`(.*?)`/.freeze
|
44
|
+
|
45
|
+
# NOTE: in mysqldump,
|
46
|
+
# integer/float/NULL type has dumped without single quote. e.g. -123 / 2.4 / NULL
|
47
|
+
# string/time type has dumped with single quote. e.g. 'string' / '2018-08-22 13:27:34'
|
48
|
+
# binary/blob type has dumped with _binary prefix. e.g. _binary 'binarydata'
|
49
|
+
# if there is single quote inside of value, it will dumped with escape. e.g. 'chikahiro\'s item'
|
50
|
+
# in number, there could be include Scientific notation e.g. 1.2E3 / -1.2E-3 / 1e+030 / 9.71726e-17
|
51
|
+
# refs: https://dev.mysql.com/doc/refman/5.7/en/precision-math-numbers.html
|
52
|
+
NUMBER_REGEXP = '[+eE0-9.-]+'
|
53
|
+
NULL_REGEXP = 'NULL'
|
54
|
+
STRING_TIME_REGEXP = "'.*?'"
|
55
|
+
BINARY_REGEXP = "_binary '.*?'"
|
56
|
+
|
57
|
+
VALUE_REGEXP = "(#{NUMBER_REGEXP}|#{NULL_REGEXP}|#{STRING_TIME_REGEXP}|#{BINARY_REGEXP})"
|
58
|
+
|
59
|
+
def values_regexp
|
60
|
+
@values_regexp ||= /^\(?#{([VALUE_REGEXP] * columns.count).join(?,)}\)?$/
|
61
|
+
end
|
62
|
+
|
63
|
+
# Check single quote count on each value, and just continue if it's even number.
|
64
|
+
# if it's odd, concat with next row (it means a value contains "),(" pattern)
|
65
|
+
# e.g. INSERT ... VALUES (123,'string ),( abc'),(456,'ab');
|
66
|
+
# refs: implementation of parsing CSV on ruby standard library FasterCSV (ja): https://www.clear-code.com/blog/2018/12/25.html
|
67
|
+
def recursive_pattern_value_concat(value_rows, index)
|
68
|
+
return if value_rows[index].gsub(/\\\\/, '').gsub(/\\'/, '').scan(/'/).count.even?
|
69
|
+
|
70
|
+
value_rows[index] += VALUE_ROW_SPLITTER + value_rows.delete_at(index + 1)
|
71
|
+
recursive_pattern_value_concat(value_rows, index)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Masking
|
4
|
+
class InsertStatement
|
5
|
+
class SQLBuilder
|
6
|
+
class << self
|
7
|
+
def build(table:, columns:, values:)
|
8
|
+
new(table: table, columns: columns, values: values).send(:build)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
attr_reader :table, :columns, :values
|
15
|
+
def initialize(table:, columns:, values:)
|
16
|
+
@table = table
|
17
|
+
@columns = columns
|
18
|
+
@values = values
|
19
|
+
end
|
20
|
+
|
21
|
+
def build
|
22
|
+
%(INSERT INTO `#{table}` #{columns_section} VALUES #{values_section};\n)
|
23
|
+
end
|
24
|
+
|
25
|
+
def columns_section
|
26
|
+
'(' + columns.map { |column| "`#{column}`" }.join(', ') + ')'
|
27
|
+
end
|
28
|
+
|
29
|
+
def values_section
|
30
|
+
values.map(&:phrase).join(?,)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'delegate'
|
4
|
+
|
5
|
+
module Masking
|
6
|
+
class InsertStatement
|
7
|
+
class Value < ::SimpleDelegator
|
8
|
+
def initialize(columns:, data:)
|
9
|
+
@columns = columns
|
10
|
+
@data = Struct.new(*columns).new(*data)
|
11
|
+
# NOTE: is it better to get rid of SimpleDelegator, store data in instance variable and define accesor for it?
|
12
|
+
super(@data)
|
13
|
+
end
|
14
|
+
|
15
|
+
def phrase
|
16
|
+
'(' + to_a.join(?,) + ')'
|
17
|
+
end
|
18
|
+
|
19
|
+
# override for make comparable
|
20
|
+
# NOTE: original #== method comapares struct subclass
|
21
|
+
def ==(other)
|
22
|
+
to_h == other.to_h
|
23
|
+
end
|
24
|
+
|
25
|
+
def column?(column_name)
|
26
|
+
@columns.include?(column_name.to_sym)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'masking/data_mask_processor'
|
4
|
+
|
5
|
+
module Masking
|
6
|
+
class SQLDumpLine
|
7
|
+
def initialize(line)
|
8
|
+
@line = line
|
9
|
+
end
|
10
|
+
|
11
|
+
def output
|
12
|
+
insert_statement? ? DataMaskProcessor.process(line) : line
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
attr_reader :line
|
18
|
+
INSERT_STATEMENT_REGEXP = /^INSERT/.freeze
|
19
|
+
|
20
|
+
def insert_statement?
|
21
|
+
line.match?(INSERT_STATEMENT_REGEXP)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|