masking 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +14 -0
  3. data/.gitignore +16 -0
  4. data/.mdlrc +1 -0
  5. data/.rubocop.yml +18 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +9 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/Gemfile +8 -0
  10. data/Gemfile.lock +119 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.md +166 -0
  13. data/Rakefile +11 -0
  14. data/bin/console +11 -0
  15. data/bin/masking_profile +58 -0
  16. data/bin/setup +10 -0
  17. data/config/.keep +0 -0
  18. data/exe/masking +7 -0
  19. data/lib/masking.rb +31 -0
  20. data/lib/masking/cli.rb +42 -0
  21. data/lib/masking/cli/error_message.rb +36 -0
  22. data/lib/masking/cli/error_messages.yml +6 -0
  23. data/lib/masking/config.rb +33 -0
  24. data/lib/masking/config/target_columns.rb +52 -0
  25. data/lib/masking/config/target_columns/column.rb +32 -0
  26. data/lib/masking/config/target_columns/method.rb +41 -0
  27. data/lib/masking/config/target_columns/method/binary.rb +23 -0
  28. data/lib/masking/config/target_columns/method/boolean.rb +29 -0
  29. data/lib/masking/config/target_columns/method/date.rb +30 -0
  30. data/lib/masking/config/target_columns/method/float.rb +23 -0
  31. data/lib/masking/config/target_columns/method/integer.rb +23 -0
  32. data/lib/masking/config/target_columns/method/null.rb +17 -0
  33. data/lib/masking/config/target_columns/method/string.rb +33 -0
  34. data/lib/masking/config/target_columns/method/string_binary_distinctor.rb +31 -0
  35. data/lib/masking/config/target_columns/method/time.rb +28 -0
  36. data/lib/masking/config/target_columns/table.rb +24 -0
  37. data/lib/masking/data_mask_processor.rb +44 -0
  38. data/lib/masking/errors.rb +9 -0
  39. data/lib/masking/insert_statement.rb +74 -0
  40. data/lib/masking/insert_statement/sql_builder.rb +34 -0
  41. data/lib/masking/insert_statement/value.rb +30 -0
  42. data/lib/masking/sql_dump_line.rb +24 -0
  43. data/lib/masking/version.rb +5 -0
  44. data/masking.gemspec +46 -0
  45. data/masking.yml.sample +17 -0
  46. metadata +259 -0
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+
5
+ module Masking
6
+ class Config
7
+ class TargetColumns
8
+ class Method
9
+ class Date
10
+ def initialize(value)
11
+ @date = value
12
+ end
13
+
14
+ def call
15
+ "'#{date_format}'"
16
+ end
17
+
18
+ private
19
+
20
+ attr_reader :date
21
+ FORMAT = '%Y-%m-%d'
22
+
23
+ def date_format
24
+ date.strftime(FORMAT)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Float
8
+ def initialize(value)
9
+ @float = value
10
+ end
11
+
12
+ def call
13
+ float.to_s
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :float
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Integer
8
+ def initialize(value)
9
+ @integer = value
10
+ end
11
+
12
+ def call
13
+ integer.to_s
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :integer
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Null
8
+ def initialize(*); end
9
+
10
+ def call
11
+ 'NULL'
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class String
8
+ def initialize(value)
9
+ @string = value
10
+ @sequence = 0
11
+ end
12
+
13
+ def call
14
+ "'#{output}'".b
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :string
20
+ SEQUENTIAL_NUMBER_PLACEHOLDER = /%{n}/.freeze
21
+
22
+ def output
23
+ string.sub(SEQUENTIAL_NUMBER_PLACEHOLDER, sequence.to_s)
24
+ end
25
+
26
+ def sequence
27
+ @sequence += 1
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/config/target_columns/method/binary'
4
+ require 'masking/config/target_columns/method/string'
5
+
6
+ module Masking
7
+ class Config
8
+ class TargetColumns
9
+ class Method
10
+ module StringBinaryDistinctor
11
+ class << self
12
+ def new(value)
13
+ binary?(value) ? Binary.new(value) : String.new(value)
14
+ end
15
+
16
+ private
17
+
18
+ # NOTE: this is referenced code from standard library
19
+ # ruby/psych: Rely on encoding tags to determine if string should be dumped as binary
20
+ # https://github.com/ruby/psych/commit/8949a47b8cee31e03e21608406ba116adcf74054
21
+ # https://github.com/ruby/psych/issues/278
22
+ # https://github.com/ruby/psych/blob/e01839af57df559b26f74e906062be6c692c89c8/lib/psych/visitors/yaml_tree.rb#L419-L421
23
+ def binary?(string)
24
+ string.encoding == Encoding::ASCII_8BIT
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Config
5
+ class TargetColumns
6
+ class Method
7
+ class Time
8
+ def initialize(value)
9
+ @time = value
10
+ end
11
+
12
+ def call
13
+ "'#{time_format}'"
14
+ end
15
+
16
+ private
17
+
18
+ attr_reader :time
19
+ FORMAT = '%Y-%m-%d %H:%M:%S'
20
+
21
+ def time_format
22
+ time.strftime(FORMAT)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/config/target_columns/column'
4
+
5
+ module Masking
6
+ class Config
7
+ class TargetColumns
8
+ class Table
9
+ attr_reader :name, :columns
10
+
11
+ def initialize(name, columns:)
12
+ @name = name.to_sym
13
+ @columns = columns.map do |column, method_value|
14
+ Masking::Config::TargetColumns::Column.new(column, table_name: self.name, method_value: method_value)
15
+ end
16
+ end
17
+
18
+ def ==(other)
19
+ name == other.name
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/config/target_columns'
4
+ require 'masking/insert_statement'
5
+
6
+ module Masking
7
+ # TODO: find better naming/modeling of DataMaskProcessor
8
+ class DataMaskProcessor
9
+ class << self
10
+ def process(insert_statement_line, target_columns: ::Masking.config.target_columns)
11
+ new(insert_statement_line, target_columns: target_columns).send(:process)
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :raw_line, :target_columns, :insert_statement
18
+
19
+ def initialize(insert_statement_line, target_columns:)
20
+ @raw_line = insert_statement_line
21
+ @target_columns = target_columns
22
+ @insert_statement = InsertStatement.new(insert_statement_line)
23
+ end
24
+
25
+ # TODO: define insert_statement.mask_values(column, mask_method) method & refactoring
26
+ # rubocop:disable Metrics/AbcSize
27
+ def process
28
+ return raw_line unless target_table?
29
+
30
+ target_columns.columns(table_name: insert_statement.table).each do |target_column|
31
+ insert_statement.values.map do |value|
32
+ value[target_column.name] = target_column.masked_value if value.column?(target_column.name)
33
+ end
34
+ end
35
+
36
+ insert_statement.sql
37
+ end
38
+ # rubocop:enable Metrics/AbcSize
39
+
40
+ def target_table?
41
+ target_columns.contains?(table_name: insert_statement.table)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class Error < StandardError
5
+ class ConfigFileDoesNotExist < Error; end
6
+ class ConfigFileIsNotFile < Error; end
7
+ class ConfigFileIsNotValidYaml < Error; end
8
+ end
9
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/insert_statement/value'
4
+ require 'masking/insert_statement/sql_builder'
5
+
6
+ module Masking
7
+ class InsertStatement
8
+ attr_reader :raw_statement, :table
9
+
10
+ def initialize(raw_statement)
11
+ @raw_statement = raw_statement
12
+
13
+ PARSE_REGEXP.match(raw_statement).tap do |match_data|
14
+ @table = match_data[:table]
15
+ @columns_section = match_data[:columns_section]
16
+ @values_section = match_data[:values_section]
17
+ end
18
+ end
19
+
20
+ def columns
21
+ # NOTE: define and extract to ColumnSet class?
22
+ @columns ||= columns_section.scan(COLUMNS_REGEXP).flatten.map(&:to_sym)
23
+ end
24
+
25
+ def values
26
+ # NOTE: define and extract to ValueSet class?
27
+ @values ||= values_section.split(VALUE_ROW_SPLITTER)
28
+ .tap { |rows| rows.each_with_index { |_, i| recursive_pattern_value_concat(rows, i) } }
29
+ .map { |row| row.scan(values_regexp).flatten }
30
+ .map { |data| Value.new(columns: columns, data: data) }
31
+ end
32
+
33
+ def sql
34
+ SQLBuilder.build(table: table, columns: columns, values: values)
35
+ end
36
+
37
+ private
38
+
39
+ attr_reader :columns_section, :values_section
40
+
41
+ VALUE_ROW_SPLITTER = '),('
42
+ PARSE_REGEXP = /INSERT INTO `(?<table>.+)` \((?<columns_section>.+)\) VALUES (?<values_section>.+);/.freeze
43
+ COLUMNS_REGEXP = /`(.*?)`/.freeze
44
+
45
+ # NOTE: in mysqldump,
46
+ # integer/float/NULL type has dumped without single quote. e.g. -123 / 2.4 / NULL
47
+ # string/time type has dumped with single quote. e.g. 'string' / '2018-08-22 13:27:34'
48
+ # binary/blob type has dumped with _binary prefix. e.g. _binary 'binarydata'
49
+ # if there is single quote inside of value, it will dumped with escape. e.g. 'chikahiro\'s item'
50
+ # in number, there could be include Scientific notation e.g. 1.2E3 / -1.2E-3 / 1e+030 / 9.71726e-17
51
+ # refs: https://dev.mysql.com/doc/refman/5.7/en/precision-math-numbers.html
52
+ NUMBER_REGEXP = '[+eE0-9.-]+'
53
+ NULL_REGEXP = 'NULL'
54
+ STRING_TIME_REGEXP = "'.*?'"
55
+ BINARY_REGEXP = "_binary '.*?'"
56
+
57
+ VALUE_REGEXP = "(#{NUMBER_REGEXP}|#{NULL_REGEXP}|#{STRING_TIME_REGEXP}|#{BINARY_REGEXP})"
58
+
59
+ def values_regexp
60
+ @values_regexp ||= /^\(?#{([VALUE_REGEXP] * columns.count).join(?,)}\)?$/
61
+ end
62
+
63
+ # Check single quote count on each value, and just continue if it's even number.
64
+ # if it's odd, concat with next row (it means a value contains "),(" pattern)
65
+ # e.g. INSERT ... VALUES (123,'string ),( abc'),(456,'ab');
66
+ # refs: implementation of parsing CSV on ruby standard library FasterCSV (ja): https://www.clear-code.com/blog/2018/12/25.html
67
+ def recursive_pattern_value_concat(value_rows, index)
68
+ return if value_rows[index].gsub(/\\\\/, '').gsub(/\\'/, '').scan(/'/).count.even?
69
+
70
+ value_rows[index] += VALUE_ROW_SPLITTER + value_rows.delete_at(index + 1)
71
+ recursive_pattern_value_concat(value_rows, index)
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ class InsertStatement
5
+ class SQLBuilder
6
+ class << self
7
+ def build(table:, columns:, values:)
8
+ new(table: table, columns: columns, values: values).send(:build)
9
+ end
10
+ end
11
+
12
+ private
13
+
14
+ attr_reader :table, :columns, :values
15
+ def initialize(table:, columns:, values:)
16
+ @table = table
17
+ @columns = columns
18
+ @values = values
19
+ end
20
+
21
+ def build
22
+ %(INSERT INTO `#{table}` #{columns_section} VALUES #{values_section};\n)
23
+ end
24
+
25
+ def columns_section
26
+ '(' + columns.map { |column| "`#{column}`" }.join(', ') + ')'
27
+ end
28
+
29
+ def values_section
30
+ values.map(&:phrase).join(?,)
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ module Masking
6
+ class InsertStatement
7
+ class Value < ::SimpleDelegator
8
+ def initialize(columns:, data:)
9
+ @columns = columns
10
+ @data = Struct.new(*columns).new(*data)
11
+ # NOTE: is it better to get rid of SimpleDelegator, store data in instance variable and define accesor for it?
12
+ super(@data)
13
+ end
14
+
15
+ def phrase
16
+ '(' + to_a.join(?,) + ')'
17
+ end
18
+
19
+ # override for make comparable
20
+ # NOTE: original #== method comapares struct subclass
21
+ def ==(other)
22
+ to_h == other.to_h
23
+ end
24
+
25
+ def column?(column_name)
26
+ @columns.include?(column_name.to_sym)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'masking/data_mask_processor'
4
+
5
+ module Masking
6
+ class SQLDumpLine
7
+ def initialize(line)
8
+ @line = line
9
+ end
10
+
11
+ def output
12
+ insert_statement? ? DataMaskProcessor.process(line) : line
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :line
18
+ INSERT_STATEMENT_REGEXP = /^INSERT/.freeze
19
+
20
+ def insert_statement?
21
+ line.match?(INSERT_STATEMENT_REGEXP)
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Masking
4
+ VERSION = '0.0.1'
5
+ end