masking 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +16 -7
- data/.codeclimate.yml +4 -0
- data/.rubocop.yml +7 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +12 -1
- data/Dockerfile +1 -1
- data/Gemfile.lock +50 -69
- data/README.md +23 -29
- data/benchmark/masking.yml +12 -0
- data/{bin/benchmark.rb → benchmark/run.rb} +4 -4
- data/benchmark/users.sql +400 -0
- data/lib/masking.rb +4 -4
- data/lib/masking/cli.rb +3 -2
- data/lib/masking/config/target_columns/column.rb +1 -9
- data/lib/masking/data_mask_processor.rb +35 -27
- data/lib/masking/data_mask_processor/cache.rb +22 -0
- data/lib/masking/insert_statement.rb +14 -5
- data/lib/masking/insert_statement/sql_builder.rb +5 -10
- data/lib/masking/sql_dump_line.rb +21 -6
- data/lib/masking/version.rb +1 -1
- data/masking.gemspec +4 -4
- metadata +20 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8aa45bdedacc8108e37fb65b293936f242e4b7121216d28efb45d7c0c58be632
|
4
|
+
data.tar.gz: a7286cfa3cdfe8eab59d65434659238cfaa20c71a186694dc59f772c0668f327
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30bc83a25cec0e15687b4c98a04f387f11ef56f76808784ef4edb3da827954470c671ba70dcc17e388f914c550b4ce2833352eb05ff777066ae250f428af470a
|
7
|
+
data.tar.gz: c3fe305d12996ad52d0cf640cda3d37b8d772af15b188e226045f55d38fd160f0670d03f55e2cd34f18054718e5cf4984af5a2f7d7e2f0cb0db9fd634e3e774c
|
data/.circleci/config.yml
CHANGED
@@ -1,18 +1,27 @@
|
|
1
1
|
version: 2.1
|
2
2
|
|
3
3
|
test_attributes: &test_attributes
|
4
|
-
|
4
|
+
docker:
|
5
|
+
- image: circleci/buildpack-deps:trusty
|
6
|
+
resource_class: small
|
5
7
|
steps:
|
6
8
|
- checkout
|
7
|
-
-
|
8
|
-
- run:
|
9
|
-
|
9
|
+
- setup_remote_docker
|
10
|
+
- run:
|
11
|
+
name: build docker image
|
12
|
+
command: docker build -t masking-$RUBY_VERSION-$CIRCLE_SHA1 --build-arg ruby_version=$RUBY_VERSION .
|
13
|
+
- run:
|
14
|
+
name: output Ruby version (debug)
|
15
|
+
command: docker run --entrypoint sh masking-$RUBY_VERSION-$CIRCLE_SHA1 -c "ruby -v" # debug
|
16
|
+
- run:
|
17
|
+
name: run test
|
18
|
+
command: docker run --entrypoint sh -e CI -e CODECOV_TOKEN masking-$RUBY_VERSION-$CIRCLE_SHA1 -c "bundle exec rspec"
|
10
19
|
|
11
20
|
jobs:
|
12
|
-
test-
|
21
|
+
test-ruby27:
|
13
22
|
<<: *test_attributes
|
14
23
|
environment:
|
15
|
-
RUBY_VERSION:
|
24
|
+
RUBY_VERSION: '2.7'
|
16
25
|
test-ruby26:
|
17
26
|
<<: *test_attributes
|
18
27
|
environment:
|
@@ -27,4 +36,4 @@ workflows:
|
|
27
36
|
jobs:
|
28
37
|
- test-ruby26
|
29
38
|
- test-ruby25
|
30
|
-
- test-
|
39
|
+
- test-ruby27
|
data/.codeclimate.yml
CHANGED
@@ -9,6 +9,10 @@ plugins:
|
|
9
9
|
enabled: false
|
10
10
|
MD033: # MD033/no-inline-html
|
11
11
|
enabled: false
|
12
|
+
MD024: # Multiple headers with the same content, it is okay for CHANGELOG,md
|
13
|
+
enabled: false
|
14
|
+
MD046: # Code block style, not possible to work with number list
|
15
|
+
enabled: false
|
12
16
|
# below 3 checks are disabled because Codeclimate's Markdownlint is not latest version (0.5.0)
|
13
17
|
MD023:
|
14
18
|
enabled: false
|
data/.rubocop.yml
CHANGED
@@ -10,9 +10,16 @@ Metrics/LineLength:
|
|
10
10
|
|
11
11
|
Metrics/BlockLength:
|
12
12
|
Max: 75
|
13
|
+
Exclude:
|
14
|
+
- spec/**/*
|
13
15
|
|
14
16
|
Style/Documentation:
|
15
17
|
Enabled: false
|
16
18
|
|
17
19
|
Style/CharacterLiteral:
|
18
20
|
Enabled: false
|
21
|
+
|
22
|
+
Style/BlockDelimiters:
|
23
|
+
Enabled: true
|
24
|
+
Exclude:
|
25
|
+
- spec/**/*
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.7.0
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [v1.0.1] - 2019-12-31
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- add Ruby 2.7 support [#53](https://github.com/kibitan/masking/pull/53)
|
15
|
+
|
16
|
+
### Changed
|
17
|
+
|
18
|
+
- refactoring [#48](https://github.com/kibitan/masking/pull/48) [#49](https://github.com/kibitan/masking/pull/49) [#50](https://github.com/kibitan/masking/pull/50)
|
19
|
+
|
10
20
|
## [v1.0.0] - 2019-11-10
|
11
21
|
|
12
22
|
### Added
|
@@ -46,7 +56,8 @@ $ bin/benchmark.rb
|
|
46
56
|
|
47
57
|
Initial release version. 🎉
|
48
58
|
|
49
|
-
[Unreleased]: https://github.com/kibitan/masking/compare/
|
59
|
+
[Unreleased]: https://github.com/kibitan/masking/compare/v1.0.0...HEAD
|
60
|
+
[v1.0.0]: https://github.com/kibitan/masking/compare/v0.0.3...v1.0.0
|
50
61
|
[v0.0.3]: https://github.com/kibitan/masking/compare/v0.0.2...v0.0.3
|
51
62
|
[v0.0.2]: https://github.com/kibitan/masking/compare/v0.0.1...v0.0.2
|
52
63
|
[v0.0.1]: https://github.com/kibitan/masking/tree/v0.0.1
|
data/Dockerfile
CHANGED
@@ -6,7 +6,7 @@ WORKDIR /app
|
|
6
6
|
RUN addgroup -S app && adduser -S -G app app
|
7
7
|
USER app
|
8
8
|
COPY --chown=app . ./
|
9
|
-
RUN gem install bundler:2.
|
9
|
+
RUN gem install bundler:2.1.2 && bundle install -j "$(nproc)"
|
10
10
|
|
11
11
|
FROM builder AS with-mysql-client
|
12
12
|
USER root
|
data/Gemfile.lock
CHANGED
@@ -1,119 +1,100 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
masking (1.0.
|
4
|
+
masking (1.0.1)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
ast (2.4.0)
|
10
|
-
byebug (
|
10
|
+
byebug (11.0.1)
|
11
|
+
codecov (0.1.16)
|
12
|
+
json
|
13
|
+
simplecov
|
14
|
+
url
|
11
15
|
coderay (1.1.2)
|
12
16
|
colored (1.2)
|
13
|
-
coveralls (0.7.1)
|
14
|
-
multi_json (~> 1.3)
|
15
|
-
rest-client
|
16
|
-
simplecov (>= 0.7)
|
17
|
-
term-ansicolor
|
18
|
-
thor
|
19
17
|
diff-lcs (1.3)
|
20
|
-
docile (1.3.
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
mixlib-
|
31
|
-
mixlib-config (~> 2.2, >= 2.2.1)
|
18
|
+
docile (1.3.2)
|
19
|
+
jaro_winkler (1.5.4)
|
20
|
+
json (2.3.0)
|
21
|
+
kramdown (2.1.0)
|
22
|
+
kramdown-parser-gfm (1.1.0)
|
23
|
+
kramdown (~> 2.0)
|
24
|
+
mdl (0.8.0)
|
25
|
+
kramdown (~> 2.0)
|
26
|
+
kramdown-parser-gfm (~> 1.0)
|
27
|
+
mixlib-cli (~> 2.1, >= 2.1.1)
|
28
|
+
mixlib-config (>= 2.2.1, < 4)
|
32
29
|
method_source (0.9.2)
|
33
|
-
|
34
|
-
|
35
|
-
mime-types-data (3.2018.0812)
|
36
|
-
mixlib-cli (1.7.0)
|
37
|
-
mixlib-config (2.2.18)
|
30
|
+
mixlib-cli (2.1.5)
|
31
|
+
mixlib-config (3.0.5)
|
38
32
|
tomlrb
|
39
|
-
|
40
|
-
|
41
|
-
parallel (1.12.1)
|
42
|
-
parser (2.5.3.0)
|
33
|
+
parallel (1.19.1)
|
34
|
+
parser (2.6.5.0)
|
43
35
|
ast (~> 2.4.0)
|
44
|
-
powerpack (0.1.2)
|
45
36
|
pry (0.12.2)
|
46
37
|
coderay (~> 1.1.0)
|
47
38
|
method_source (~> 0.9.0)
|
48
|
-
pry-byebug (3.
|
49
|
-
byebug (~>
|
39
|
+
pry-byebug (3.7.0)
|
40
|
+
byebug (~> 11.0)
|
50
41
|
pry (~> 0.10)
|
51
42
|
rainbow (3.0.0)
|
52
|
-
rake (
|
43
|
+
rake (13.0.1)
|
53
44
|
rake-notes (0.2.2)
|
54
45
|
colored
|
55
46
|
rake
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
rspec (3.
|
61
|
-
rspec-
|
62
|
-
|
63
|
-
rspec-mocks (~> 3.8.0)
|
64
|
-
rspec-core (3.8.0)
|
65
|
-
rspec-support (~> 3.8.0)
|
66
|
-
rspec-expectations (3.8.2)
|
47
|
+
rspec (3.9.0)
|
48
|
+
rspec-core (~> 3.9.0)
|
49
|
+
rspec-expectations (~> 3.9.0)
|
50
|
+
rspec-mocks (~> 3.9.0)
|
51
|
+
rspec-core (3.9.0)
|
52
|
+
rspec-support (~> 3.9.0)
|
53
|
+
rspec-expectations (3.9.0)
|
67
54
|
diff-lcs (>= 1.2.0, < 2.0)
|
68
|
-
rspec-support (~> 3.
|
69
|
-
rspec-mocks (3.
|
55
|
+
rspec-support (~> 3.9.0)
|
56
|
+
rspec-mocks (3.9.0)
|
70
57
|
diff-lcs (>= 1.2.0, < 2.0)
|
71
|
-
rspec-support (~> 3.
|
72
|
-
rspec-support (3.
|
73
|
-
rubocop (0.
|
58
|
+
rspec-support (~> 3.9.0)
|
59
|
+
rspec-support (3.9.0)
|
60
|
+
rubocop (0.78.0)
|
74
61
|
jaro_winkler (~> 1.5.1)
|
75
62
|
parallel (~> 1.10)
|
76
|
-
parser (>= 2.
|
77
|
-
powerpack (~> 0.1)
|
63
|
+
parser (>= 2.6)
|
78
64
|
rainbow (>= 2.2.2, < 4.0)
|
79
65
|
ruby-progressbar (~> 1.7)
|
80
|
-
unicode-display_width (
|
81
|
-
ruby-prof (
|
82
|
-
ruby-progressbar (1.10.
|
83
|
-
simplecov (0.
|
66
|
+
unicode-display_width (>= 1.4.0, < 1.7)
|
67
|
+
ruby-prof (1.1.0)
|
68
|
+
ruby-progressbar (1.10.1)
|
69
|
+
simplecov (0.17.1)
|
84
70
|
docile (~> 1.1)
|
85
71
|
json (>= 1.8, < 3)
|
86
72
|
simplecov-html (~> 0.10.0)
|
87
73
|
simplecov-html (0.10.2)
|
88
74
|
tapp (1.5.1)
|
89
75
|
thor
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
tomlrb (1.2.8)
|
95
|
-
unf (0.1.4)
|
96
|
-
unf_ext
|
97
|
-
unf_ext (0.0.7.5)
|
98
|
-
unicode-display_width (1.4.1)
|
76
|
+
thor (1.0.1)
|
77
|
+
tomlrb (1.2.9)
|
78
|
+
unicode-display_width (1.6.0)
|
79
|
+
url (0.3.2)
|
99
80
|
|
100
81
|
PLATFORMS
|
101
82
|
ruby
|
102
83
|
|
103
84
|
DEPENDENCIES
|
104
|
-
bundler
|
105
|
-
|
85
|
+
bundler
|
86
|
+
codecov
|
106
87
|
masking!
|
107
88
|
mdl
|
108
89
|
pry
|
109
90
|
pry-byebug
|
110
|
-
rake
|
91
|
+
rake
|
111
92
|
rake-notes
|
112
|
-
rspec
|
93
|
+
rspec
|
113
94
|
rubocop
|
114
95
|
ruby-prof
|
115
96
|
simplecov
|
116
97
|
tapp
|
117
98
|
|
118
99
|
BUNDLED WITH
|
119
|
-
2.
|
100
|
+
2.1.2
|
data/README.md
CHANGED
@@ -4,11 +4,11 @@
|
|
4
4
|
[![Acceptance Test MySQL Status](https://github.com/kibitan/masking/workflows/Acceptance%20Test%20MySQL/badge.svg?branch=master)](https://github.com/kibitan/masking/actions?query=workflow%3A%22Acceptance+Test+MySQL%22+branch%3Amaster)
|
5
5
|
[![Acceptance Test MariaDB Status](https://github.com/kibitan/masking/workflows/Acceptance%20Test%20MariaDB/badge.svg?branch=master)](https://github.com/kibitan/masking/actions?query=workflow%3A%22Acceptance+Test+MariaDB%22+branch%3Amaster)
|
6
6
|
|
7
|
-
[![
|
7
|
+
[![codecov](https://codecov.io/gh/kibitan/masking/branch/master/graph/badge.svg)](https://codecov.io/gh/kibitan/masking)
|
8
8
|
[![Maintainability](https://api.codeclimate.com/v1/badges/290b3005ecc193a3d138/maintainability)](https://codeclimate.com/github/kibitan/masking/maintainability)
|
9
9
|
[![Gem Version](https://badge.fury.io/rb/masking.svg)](https://badge.fury.io/rb/masking)
|
10
10
|
|
11
|
-
The command line tool for anonymizing database records by parsing a SQL dump file and build new SQL dump file with masking sensitive/credential data.
|
11
|
+
The command line tool for anonymizing database records by parsing a SQL dump file and build a new SQL dump file with masking sensitive/credential data.
|
12
12
|
|
13
13
|
## Installation
|
14
14
|
|
@@ -18,7 +18,7 @@ gem install masking
|
|
18
18
|
|
19
19
|
## Requirement
|
20
20
|
|
21
|
-
* Ruby 2.5/2.6/2.7
|
21
|
+
* Ruby 2.5/2.6/2.7
|
22
22
|
|
23
23
|
## Supported RDBMS
|
24
24
|
|
@@ -49,7 +49,7 @@ gem install masking
|
|
49
49
|
AgjoEwnuNAFOhpEMTRiggcz4BNJHrv/zCFcLiwMWYNG84BwwEeECcgggoBADs=
|
50
50
|
```
|
51
51
|
|
52
|
-
A value will be implicitly converted to compatible type. If you prefer to explicitly convert, you could use a tag as defined in [YAML Version 1.1](http://yaml.org/spec/current.html#id2503753)
|
52
|
+
A value will be implicitly converted to a compatible type. If you prefer to explicitly convert, you could use a tag as defined in [YAML Version 1.1](http://yaml.org/spec/current.html#id2503753)
|
53
53
|
|
54
54
|
```yaml
|
55
55
|
not-date: !!str 2002-04-28
|
@@ -57,7 +57,7 @@ gem install masking
|
|
57
57
|
|
58
58
|
String should be matched with [MySQL String Type]( https://dev.mysql.com/doc/refman/8.0/en/string-type-overview.html). Integer/Float should be matched with [MySQL Numeric Type](https://dev.mysql.com/doc/refman/8.0/en/numeric-type-overview.html). Date/Time should be matched with [MySQL Date and Time Type](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-type-overview.html).
|
59
59
|
|
60
|
-
*NOTE: MasKING doesn't check actual schema's type from dump. If you put
|
60
|
+
*NOTE: MasKING doesn't check actual schema's type from the dump. If you put incompatible value it will cause an error during restoring to the database.*
|
61
61
|
|
62
62
|
1. Dump database with anonymizing
|
63
63
|
|
@@ -67,13 +67,13 @@ gem install masking
|
|
67
67
|
mysqldump --complete-insert -u USERNAME DATABASE_NAME | masking > anonymized_dump.sql
|
68
68
|
```
|
69
69
|
|
70
|
-
1. Restore from anonymized dump file
|
70
|
+
1. Restore from the anonymized dump file
|
71
71
|
|
72
72
|
```bash
|
73
73
|
mysql -u USERNAME ANONYMIZED_DATABASE_NAME < anonymized_dump.sql
|
74
74
|
```
|
75
75
|
|
76
|
-
Tip: If you don't need to have anonymized dump file, you can directly insert from stream. It can be faster because it has less IO interaction.
|
76
|
+
Tip: If you don't need to have an anonymized dump file, you can directly insert it from the stream. It can be faster because it has less IO interaction.
|
77
77
|
|
78
78
|
```bash
|
79
79
|
mysqldump --complete-insert -u USERNAME DATABASE_NAME | masking | mysql -u USERNAME ANONYMIZED_DATABASE_NAME
|
@@ -88,27 +88,21 @@ Usage: masking [options]
|
|
88
88
|
-v, --version version
|
89
89
|
```
|
90
90
|
|
91
|
-
## Use case of
|
91
|
+
## Use case of anonymized (production) database
|
92
92
|
|
93
|
-
*
|
93
|
+
* Analyzing production databases for BI, Machine Learning, troubleshooting with respecting GDPR
|
94
|
+
* Stress test / Integration test
|
95
|
+
* Performance optimization for slow query
|
94
96
|
|
95
|
-
|
97
|
+
The analyzing slow query often needs a similar amount of records/cardinality with production, the anonymized database help to analyze and tune the slow query.
|
96
98
|
|
97
|
-
*
|
99
|
+
* Simulating database migration
|
98
100
|
|
99
|
-
Some
|
101
|
+
Some schema migration locks table and it causes trouble during the execution. With a smaller amount of database, the migration will finish in a short time and easy to overlook the problem. With the anonymized production database, it is easy to simulate the migration as the real release and makes it easy to find the problem.
|
100
102
|
|
101
|
-
*
|
103
|
+
* Better feature development flow
|
102
104
|
|
103
|
-
|
104
|
-
|
105
|
-
* Better development/demo of a feature
|
106
|
-
|
107
|
-
Using similar data with real one will be good to make a good view of how feature looks like. It makes easy to find out the things to be changed/fixed before release/check the feature in production.
|
108
|
-
|
109
|
-
* Analyze metrics on our production data with respecting GDPR
|
110
|
-
|
111
|
-
We can use this database for BI and some trouble shooting.
|
105
|
+
Using similar data with the production database makes better development experience. It makes easy to find out the things which should be changed/fixed. Also, some bugs are related to unexpected data in production, it makes easy to find them too.
|
112
106
|
|
113
107
|
* And… your idea here!
|
114
108
|
|
@@ -204,31 +198,31 @@ see also: [ruby-prof/ruby-prof: ruby-prof: a code profiler for MRI rubies](https
|
|
204
198
|
|
205
199
|
### Benchmark
|
206
200
|
|
207
|
-
use `
|
201
|
+
use `benchmark/run.rb`
|
208
202
|
|
209
203
|
```bash
|
210
|
-
$
|
204
|
+
$ benchmark/run.rb
|
211
205
|
user system total real
|
212
|
-
1.
|
206
|
+
1.103012 0.009460 1.112472 ( 1.123093)
|
213
207
|
```
|
214
208
|
|
215
209
|
## Design Concept
|
216
210
|
|
217
211
|
### KISS ~ keep it simple, stupid ~
|
218
212
|
|
219
|
-
No connection to database, No handling
|
213
|
+
No connection to the database, No handling files, Only dealing with stdin/stdout. ~ Do One Thing and Do It Well ~
|
220
214
|
|
221
215
|
### No External Dependency
|
222
216
|
|
223
|
-
Depend on only pure language standard libraries, no external libraries
|
217
|
+
Depend on only pure language standard libraries, no external libraries
|
224
218
|
|
225
219
|
## Future Todo
|
226
220
|
|
227
|
-
*
|
221
|
+
* Pluggable/customizable for a mask way e.g. integrate with [Faker](https://github.com/stympy/faker)
|
228
222
|
* Compatible with other RDBMS e.g. PostgreSQL, Oracle, SQL Server
|
229
223
|
* Parse the schema type information and validate target columns value
|
230
224
|
* Performance optimization
|
231
|
-
* Write in streaming process
|
225
|
+
* Write in the streaming process
|
232
226
|
* rewrite by another language?
|
233
227
|
* Well-documentation
|
234
228
|
|
@@ -5,13 +5,13 @@ $LOAD_PATH.unshift('./lib')
|
|
5
5
|
require 'benchmark'
|
6
6
|
require 'masking'
|
7
7
|
|
8
|
-
n = 10_000
|
9
|
-
|
10
8
|
Masking.configure do |config|
|
11
|
-
config.target_columns_file_path = '
|
9
|
+
config.target_columns_file_path = 'benchmark/masking.yml'
|
12
10
|
end
|
13
11
|
|
14
|
-
|
12
|
+
n = 30
|
13
|
+
|
14
|
+
fixture = File.open('benchmark/users.sql')
|
15
15
|
|
16
16
|
Benchmark.bm do |x|
|
17
17
|
x.report do
|