masking 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +14 -0
  3. data/.gitignore +16 -0
  4. data/.mdlrc +1 -0
  5. data/.rubocop.yml +18 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +8 -0
  10. data/Gemfile.lock +119 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.md +166 -0
  13. data/Rakefile +11 -0
  14. data/bin/console +11 -0
  15. data/bin/masking_profile +58 -0
  16. data/bin/setup +10 -0
  17. data/config/.keep +0 -0
  18. data/exe/masking +7 -0
  19. data/lib/masking.rb +31 -0
  20. data/lib/masking/cli.rb +42 -0
  21. data/lib/masking/cli/error_message.rb +36 -0
  22. data/lib/masking/cli/error_messages.yml +6 -0
  23. data/lib/masking/config.rb +33 -0
  24. data/lib/masking/config/target_columns.rb +52 -0
  25. data/lib/masking/config/target_columns/column.rb +32 -0
  26. data/lib/masking/config/target_columns/method.rb +41 -0
  27. data/lib/masking/config/target_columns/method/binary.rb +23 -0
  28. data/lib/masking/config/target_columns/method/boolean.rb +29 -0
  29. data/lib/masking/config/target_columns/method/date.rb +30 -0
  30. data/lib/masking/config/target_columns/method/float.rb +23 -0
  31. data/lib/masking/config/target_columns/method/integer.rb +23 -0
  32. data/lib/masking/config/target_columns/method/null.rb +17 -0
  33. data/lib/masking/config/target_columns/method/string.rb +33 -0
  34. data/lib/masking/config/target_columns/method/string_binary_distinctor.rb +31 -0
  35. data/lib/masking/config/target_columns/method/time.rb +28 -0
  36. data/lib/masking/config/target_columns/table.rb +24 -0
  37. data/lib/masking/data_mask_processor.rb +44 -0
  38. data/lib/masking/errors.rb +9 -0
  39. data/lib/masking/insert_statement.rb +74 -0
  40. data/lib/masking/insert_statement/sql_builder.rb +34 -0
  41. data/lib/masking/insert_statement/value.rb +30 -0
  42. data/lib/masking/sql_dump_line.rb +24 -0
  43. data/lib/masking/version.rb +5 -0
  44. data/masking.gemspec +46 -0
  45. data/masking.yml.sample +17 -0
  46. metadata +259 -0
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+
5
+ module Masking
6
+ class Config
7
+ class TargetColumns
8
+ class Method
9
+ class Date
10
+ def initialize(value)
11
+ @date = value
12
+ end
13
+
14
+ def call
15
+ "'#{date_format}'"
16
+ end
17
+
18
+ private
19
+
20
+ attr_reader :date
21
+ FORMAT = '%Y-%m-%d'
22
+
23
+ def date_format
24
+ date.strftime(FORMAT)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Float
8
+ def initialize(value)
9
+ @float = value
10
+ end
11
+
12
+ def call
13
+ float.to_s
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :float
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Integer
8
+ def initialize(value)
9
+ @integer = value
10
+ end
11
+
12
+ def call
13
+ integer.to_s
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :integer
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Null
8
+ def initialize(*); end
9
+
10
+ def call
11
+ 'NULL'
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class String
8
+ def initialize(value)
9
+ @string = value
10
+ @sequence = 0
11
+ end
12
+
13
+ def call
14
+ "'#{output}'".b
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :string
20
+ SEQUENTIAL_NUMBER_PLACEHOLDER = /%{n}/.freeze
21
+
22
+ def output
23
+ string.sub(SEQUENTIAL_NUMBER_PLACEHOLDER, sequence.to_s)
24
+ end
25
+
26
+ def sequence
27
+ @sequence += 1
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/config/target_columns/method/binary'
4
+ require 'masking/config/target_columns/method/string'
5
+
6
+ module Masking
7
+ class Config
8
+ class TargetColumns
9
+ class Method
10
+ module StringBinaryDistinctor
11
+ class << self
12
+ def new(value)
13
+ binary?(value) ? Binary.new(value) : String.new(value)
14
+ end
15
+
16
+ private
17
+
18
+ # NOTE: this is referenced code from standard library
19
+ # ruby/psych: Rely on encoding tags to determine if string should be dumped as binary
20
+ # https://github.com/ruby/psych/commit/8949a47b8cee31e03e21608406ba116adcf74054
21
+ # https://github.com/ruby/psych/issues/278
22
+ # https://github.com/ruby/psych/blob/e01839af57df559b26f74e906062be6c692c89c8/lib/psych/visitors/yaml_tree.rb#L419-L421
23
+ def binary?(string)
24
+ string.encoding == Encoding::ASCII_8BIT
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Time
8
+ def initialize(value)
9
+ @time = value
10
+ end
11
+
12
+ def call
13
+ "'#{time_format}'"
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :time
19
+ FORMAT = '%Y-%m-%d %H:%M:%S'
20
+
21
+ def time_format
22
+ time.strftime(FORMAT)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/config/target_columns/column'
4
+
5
+ module Masking
6
+ class Config
7
+ class TargetColumns
8
+ class Table
9
+ attr_reader :name, :columns
10
+
11
+ def initialize(name, columns:)
12
+ @name = name.to_sym
13
+ @columns = columns.map do |column, method_value|
14
+ Masking::Config::TargetColumns::Column.new(column, table_name: self.name, method_value: method_value)
15
+ end
16
+ end
17
+
18
+ def ==(other)
19
+ name == other.name
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/config/target_columns'
4
+ require 'masking/insert_statement'
5
+
6
+ module Masking
7
+ # TODO: find better naming/modeling of DataMaskProcessor
8
+ class DataMaskProcessor
9
+ class << self
10
+ def process(insert_statement_line, target_columns: ::Masking.config.target_columns)
11
+ new(insert_statement_line, target_columns: target_columns).send(:process)
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :raw_line, :target_columns, :insert_statement
18
+
19
+ def initialize(insert_statement_line, target_columns:)
20
+ @raw_line = insert_statement_line
21
+ @target_columns = target_columns
22
+ @insert_statement = InsertStatement.new(insert_statement_line)
23
+ end
24
+
25
+ # TODO: define insert_statement.mask_values(column, mask_method) method & refactoring
26
+ # rubocop:disable Metrics/AbcSize
27
+ def process
28
+ return raw_line unless target_table?
29
+
30
+ target_columns.columns(table_name: insert_statement.table).each do |target_column|
31
+ insert_statement.values.map do |value|
32
+ value[target_column.name] = target_column.masked_value if value.column?(target_column.name)
33
+ end
34
+ end
35
+
36
+ insert_statement.sql
37
+ end
38
+ # rubocop:enable Metrics/AbcSize
39
+
40
+ def target_table?
41
+ target_columns.contains?(table_name: insert_statement.table)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Error < StandardError
5
+ class ConfigFileDoesNotExist < Error; end
6
+ class ConfigFileIsNotFile < Error; end
7
+ class ConfigFileIsNotValidYaml < Error; end
8
+ end
9
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/insert_statement/value'
4
+ require 'masking/insert_statement/sql_builder'
5
+
6
+ module Masking
7
+ class InsertStatement
8
+ attr_reader :raw_statement, :table
9
+
10
+ def initialize(raw_statement)
11
+ @raw_statement = raw_statement
12
+
13
+ PARSE_REGEXP.match(raw_statement).tap do |match_data|
14
+ @table = match_data[:table]
15
+ @columns_section = match_data[:columns_section]
16
+ @values_section = match_data[:values_section]
17
+ end
18
+ end
19
+
20
+ def columns
21
+ # NOTE: define and extract to ColumnSet class?
22
+ @columns ||= columns_section.scan(COLUMNS_REGEXP).flatten.map(&:to_sym)
23
+ end
24
+
25
+ def values
26
+ # NOTE: define and extract to ValueSet class?
27
+ @values ||= values_section.split(VALUE_ROW_SPLITTER)
28
+ .tap { |rows| rows.each_with_index { |_, i| recursive_pattern_value_concat(rows, i) } }
29
+ .map { |row| row.scan(values_regexp).flatten }
30
+ .map { |data| Value.new(columns: columns, data: data) }
31
+ end
32
+
33
+ def sql
34
+ SQLBuilder.build(table: table, columns: columns, values: values)
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :columns_section, :values_section
40
+
41
+ VALUE_ROW_SPLITTER = '),('
42
+ PARSE_REGEXP = /INSERT INTO `(?<table>.+)` \((?<columns_section>.+)\) VALUES (?<values_section>.+);/.freeze
43
+ COLUMNS_REGEXP = /`(.*?)`/.freeze
44
+
45
+ # NOTE: in mysqldump,
46
+ # integer/float/NULL type has dumped without single quote. e.g. -123 / 2.4 / NULL
47
+ # string/time type has dumped with single quote. e.g. 'string' / '2018-08-22 13:27:34'
48
+ # binary/blob type has dumped with _binary prefix. e.g. _binary 'binarydata'
49
+ # if there is single quote inside of value, it will dumped with escape. e.g. 'chikahiro\'s item'
50
+ # in number, there could be include Scientific notation e.g. 1.2E3 / -1.2E-3 / 1e+030 / 9.71726e-17
51
+ # refs: https://dev.mysql.com/doc/refman/5.7/en/precision-math-numbers.html
52
+ NUMBER_REGEXP = '[+eE0-9.-]+'
53
+ NULL_REGEXP = 'NULL'
54
+ STRING_TIME_REGEXP = "'.*?'"
55
+ BINARY_REGEXP = "_binary '.*?'"
56
+
57
+ VALUE_REGEXP = "(#{NUMBER_REGEXP}|#{NULL_REGEXP}|#{STRING_TIME_REGEXP}|#{BINARY_REGEXP})"
58
+
59
+ def values_regexp
60
+ @values_regexp ||= /^\(?#{([VALUE_REGEXP] * columns.count).join(?,)}\)?$/
61
+ end
62
+
63
+ # Check single quote count on each value, and just continue if it's even number.
64
+ # if it's odd, concat with next row (it means a value contains "),(" pattern)
65
+ # e.g. INSERT ... VALUES (123,'string ),( abc'),(456,'ab');
66
+ # refs: implementation of parsing CSV on ruby standard library FasterCSV (ja): https://www.clear-code.com/blog/2018/12/25.html
67
+ def recursive_pattern_value_concat(value_rows, index)
68
+ return if value_rows[index].gsub(/\\\\/, '').gsub(/\\'/, '').scan(/'/).count.even?
69
+
70
+ value_rows[index] += VALUE_ROW_SPLITTER + value_rows.delete_at(index + 1)
71
+ recursive_pattern_value_concat(value_rows, index)
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class InsertStatement
5
+ class SQLBuilder
6
+ class << self
7
+ def build(table:, columns:, values:)
8
+ new(table: table, columns: columns, values: values).send(:build)
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ attr_reader :table, :columns, :values
15
+ def initialize(table:, columns:, values:)
16
+ @table = table
17
+ @columns = columns
18
+ @values = values
19
+ end
20
+
21
+ def build
22
+ %(INSERT INTO `#{table}` #{columns_section} VALUES #{values_section};\n)
23
+ end
24
+
25
+ def columns_section
26
+ '(' + columns.map { |column| "`#{column}`" }.join(', ') + ')'
27
+ end
28
+
29
+ def values_section
30
+ values.map(&:phrase).join(?,)
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ module Masking
6
+ class InsertStatement
7
+ class Value < ::SimpleDelegator
8
+ def initialize(columns:, data:)
9
+ @columns = columns
10
+ @data = Struct.new(*columns).new(*data)
11
+ # NOTE: is it better to get rid of SimpleDelegator, store data in instance variable and define accesor for it?
12
+ super(@data)
13
+ end
14
+
15
+ def phrase
16
+ '(' + to_a.join(?,) + ')'
17
+ end
18
+
19
+ # override for make comparable
20
+ # NOTE: original #== method comapares struct subclass
21
+ def ==(other)
22
+ to_h == other.to_h
23
+ end
24
+
25
+ def column?(column_name)
26
+ @columns.include?(column_name.to_sym)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/data_mask_processor'
4
+
5
+ module Masking
6
+ class SQLDumpLine
7
+ def initialize(line)
8
+ @line = line
9
+ end
10
+
11
+ def output
12
+ insert_statement? ? DataMaskProcessor.process(line) : line
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :line
18
+ INSERT_STATEMENT_REGEXP = /^INSERT/.freeze
19
+
20
+ def insert_statement?
21
+ line.match?(INSERT_STATEMENT_REGEXP)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ VERSION = '0.0.1'
5
+ end