embulk-filter-mask 0.1.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +42 -9
- data/build.gradle +13 -4
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/mask/MaskFilterPlugin.java +9 -1
- data/src/main/java/org/embulk/filter/mask/MaskPageOutput.java +121 -59
- data/src/test/java/org/embulk/filter/mask/TestMaskFilterPlugin.java +136 -6
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 420f82310881451f5b66848494db06f292c9f755
|
4
|
+
data.tar.gz: b3fb787c18bf556cf169d85343a6dacb4030f116
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 501044a9ebb52794d3bda67aa9ef947157300be945e798ace60068c7162206c522dfa228eb936659493bf40135d37a2afb5f9fcf9dcf324a36c40b636a5cf1e1
|
7
|
+
data.tar.gz: b482c2be719bab2770ba7f34c618d0d2bd611f56103bcec9df102be970965d9defbfb2af6297671d1e78674a4ba5ca1eb39eda773c8c0fe5de3f341b3b6c21c8
|
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: java
|
2
|
+
jdk:
|
3
|
+
# Tentatively ignore these environments as we face SSL errors
|
4
|
+
# https://github.com/gradle/gradle/issues/2421
|
5
|
+
#
|
6
|
+
# - openjdk7
|
7
|
+
# - oraclejdk7
|
8
|
+
- oraclejdk8
|
9
|
+
script:
|
10
|
+
- ./gradlew test
|
11
|
+
after_success:
|
12
|
+
- ./gradlew jacocoTestReport coveralls
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Mask filter plugin for Embulk
|
2
2
|
|
3
|
-
|
3
|
+
[![Coverage Status](https://coveralls.io/repos/github/beniyama/embulk-filter-mask/badge.svg)](https://coveralls.io/github/beniyama/embulk-filter-mask)
|
4
|
+
|
5
|
+
Mask columns with asterisks in a variety of patterns (still in initial development phase and missing basic features to use in production).
|
4
6
|
|
5
7
|
## Overview
|
6
8
|
|
@@ -8,13 +10,23 @@ mask columns with asterisks (still in initial development phase and missing basi
|
|
8
10
|
|
9
11
|
## Configuration
|
10
12
|
|
13
|
+
*Caution* : Now we use `type` to specify mask types such as `all` and `email`, instead of `pattern` which was used in version 0.1.1 or earlier.
|
14
|
+
|
11
15
|
- **columns**: target columns which would be replaced with asterisks (string, required)
|
12
16
|
- **name**: name of the column (string, required)
|
13
|
-
- **
|
14
|
-
- **
|
17
|
+
- **type**: mask type, `all`, `email`, `regex` or `substring` (string, default: `all`)
|
18
|
+
- **paths**: list of JSON path and type, works if the column type is JSON
|
19
|
+
- `[{key: $.json_path1}, {key: $.json_path2}]` would mask both `$.json_path1` and `$.json_path2` nodes
|
20
|
+
- Elements under the nodes would be converted to string and then masked (e.g., `[0,1,2]` -> `*******`)
|
21
|
+
- **length**: if specified, this filter replaces the column with fixed number of asterisks (integer, optional. supported only in `all`, `email`, `substring`.)
|
22
|
+
- **pattern**: Regex pattern such as "[0-9]+" (string, required for `regex` type)
|
23
|
+
- **start**: The beginning index for `substring` type. The value starts from 0 and inclusive (integer, default: 0)
|
24
|
+
- **end**: The ending index for `substring` type. The value is exclusive (integer, default: length of the target column)
|
15
25
|
|
16
26
|
## Example
|
17
27
|
|
28
|
+
|
29
|
+
|
18
30
|
If you have below data in csv or other format file,
|
19
31
|
|
20
32
|
|first_name | last_name | gender | age | contact |
|
@@ -33,7 +45,7 @@ filters:
|
|
33
45
|
columns:
|
34
46
|
- { name: last_name}
|
35
47
|
- { name: age}
|
36
|
-
- { name: contact,
|
48
|
+
- { name: contact, type: email, length: 5}
|
37
49
|
```
|
38
50
|
|
39
51
|
would produce
|
@@ -46,9 +58,29 @@ would produce
|
|
46
58
|
| Christian | **** | male | ** | *****@example.com |
|
47
59
|
| Amy | ***** | female | ** | *****@example.com |
|
48
60
|
|
61
|
+
If you use `regex` and/or `substring` types,
|
62
|
+
|
63
|
+
```yaml
|
64
|
+
filters:
|
65
|
+
- type: mask
|
66
|
+
columns:
|
67
|
+
- { name: last_name, type: regex, pattern: "[a-z]"}
|
68
|
+
- { name: contact, type: substring, start: 5, length: 5}
|
69
|
+
```
|
70
|
+
|
71
|
+
would produce
|
72
|
+
|
73
|
+
|first_name | last_name | gender | age | contact |
|
74
|
+
|---|---|---|---|---|
|
75
|
+
| B******* | Bell | male | 30 | bell.***** |
|
76
|
+
| L**** | Duncan | male | 20 | lucas***** |
|
77
|
+
| E******* | May | female | 25 | eliza***** |
|
78
|
+
| C******** | Reid | male | 15 | chris***** |
|
79
|
+
| A** | Avery | female | 40 | amy.a***** |
|
80
|
+
|
49
81
|
JSON type column is also partially supported.
|
50
82
|
|
51
|
-
If you have
|
83
|
+
If you have a `user` column with this JSON data structure
|
52
84
|
|
53
85
|
```json
|
54
86
|
{
|
@@ -57,7 +89,8 @@ If you have
|
|
57
89
|
"last_name": "Bell"
|
58
90
|
},
|
59
91
|
"gender": "male",
|
60
|
-
"age": 30
|
92
|
+
"age": 30,
|
93
|
+
"email": "test_mail@example.com"
|
61
94
|
}
|
62
95
|
```
|
63
96
|
|
@@ -67,8 +100,7 @@ below filter configuration
|
|
67
100
|
filters:
|
68
101
|
- type: mask
|
69
102
|
columns:
|
70
|
-
- { name:
|
71
|
-
- { name: age, path: $.}
|
103
|
+
- { name: user, paths: [{key: $.full_name.first_name}, {key: $.email, type: email}]}
|
72
104
|
```
|
73
105
|
|
74
106
|
would produce
|
@@ -80,7 +112,8 @@ would produce
|
|
80
112
|
"last_name": "Bell"
|
81
113
|
},
|
82
114
|
"gender": "male",
|
83
|
-
"age":
|
115
|
+
"age": 30,
|
116
|
+
"email": "*********@example.com"
|
84
117
|
}
|
85
118
|
```
|
86
119
|
|
data/build.gradle
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
plugins {
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
|
+
id "com.github.kt3k.coveralls" version "2.8.1"
|
4
5
|
id "java"
|
5
6
|
id "checkstyle"
|
7
|
+
id "jacoco"
|
6
8
|
}
|
7
9
|
import com.github.jrubygradle.JRubyExec
|
8
10
|
repositories {
|
@@ -13,17 +15,24 @@ configurations {
|
|
13
15
|
provided
|
14
16
|
}
|
15
17
|
|
16
|
-
version = "0.
|
18
|
+
version = "0.2.1"
|
17
19
|
|
18
20
|
sourceCompatibility = 1.7
|
19
21
|
targetCompatibility = 1.7
|
20
22
|
|
21
23
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.8.
|
23
|
-
provided "org.embulk:embulk-core:0.8.
|
24
|
+
compile "org.embulk:embulk-core:0.8.29"
|
25
|
+
provided "org.embulk:embulk-core:0.8.29"
|
24
26
|
compile "com.jayway.jsonpath:json-path:2.+"
|
25
27
|
testCompile "junit:junit:4.+"
|
26
|
-
testCompile "org.embulk:embulk-core:0.8.
|
28
|
+
testCompile "org.embulk:embulk-core:0.8.29:tests"
|
29
|
+
}
|
30
|
+
|
31
|
+
jacocoTestReport {
|
32
|
+
reports {
|
33
|
+
xml.enabled = true // coveralls plugin depends on xml format report
|
34
|
+
html.enabled = true
|
35
|
+
}
|
27
36
|
}
|
28
37
|
|
29
38
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -30,7 +30,7 @@ public class MaskFilterPlugin implements FilterPlugin {
|
|
30
30
|
String getName();
|
31
31
|
|
32
32
|
@Config("type")
|
33
|
-
@ConfigDefault("\"
|
33
|
+
@ConfigDefault("\"all\"")
|
34
34
|
Optional<String> getType();
|
35
35
|
|
36
36
|
@Config("pattern")
|
@@ -41,6 +41,14 @@ public class MaskFilterPlugin implements FilterPlugin {
|
|
41
41
|
@ConfigDefault("null")
|
42
42
|
Optional<Integer> getLength();
|
43
43
|
|
44
|
+
@Config("start")
|
45
|
+
@ConfigDefault("null")
|
46
|
+
Optional<Integer> getStart();
|
47
|
+
|
48
|
+
@Config("end")
|
49
|
+
@ConfigDefault("null")
|
50
|
+
Optional<Integer> getEnd();
|
51
|
+
|
44
52
|
@Config("paths")
|
45
53
|
@ConfigDefault("null")
|
46
54
|
Optional<List<Map<String, String>>> getPaths();
|
@@ -7,6 +7,7 @@ import org.embulk.config.TaskSource;
|
|
7
7
|
import org.embulk.spi.*;
|
8
8
|
import org.embulk.spi.json.JsonParser;
|
9
9
|
import org.embulk.spi.time.Timestamp;
|
10
|
+
import org.embulk.spi.type.Type;
|
10
11
|
import org.embulk.spi.type.Types;
|
11
12
|
import org.embulk.filter.mask.MaskFilterPlugin.*;
|
12
13
|
import org.msgpack.value.Value;
|
@@ -67,62 +68,87 @@ public class MaskPageOutput implements PageOutput {
|
|
67
68
|
continue;
|
68
69
|
}
|
69
70
|
|
70
|
-
|
71
|
-
|
71
|
+
String name = inputColumn.getName();
|
72
|
+
Type type = inputColumn.getType();
|
73
|
+
|
74
|
+
if (Types.STRING.equals(type)) {
|
72
75
|
final String value = reader.getString(inputColumn);
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
+
if (maskColumnMap.containsKey(name)) {
|
77
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
78
|
+
} else {
|
79
|
+
builder.setString(inputColumn, value);
|
80
|
+
}
|
81
|
+
} else if (Types.BOOLEAN.equals(type)) {
|
76
82
|
final boolean value = reader.getBoolean(inputColumn);
|
77
|
-
|
78
|
-
|
79
|
-
|
83
|
+
if (maskColumnMap.containsKey(name)) {
|
84
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
85
|
+
} else {
|
86
|
+
builder.setBoolean(inputColumn, value);
|
87
|
+
}
|
88
|
+
} else if (Types.DOUBLE.equals(type)) {
|
80
89
|
final double value = reader.getDouble(inputColumn);
|
81
|
-
|
82
|
-
|
83
|
-
|
90
|
+
if (maskColumnMap.containsKey(name)) {
|
91
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
92
|
+
} else {
|
93
|
+
builder.setDouble(inputColumn, value);
|
94
|
+
}
|
95
|
+
} else if (Types.LONG.equals(type)) {
|
84
96
|
final long value = reader.getLong(inputColumn);
|
85
|
-
|
86
|
-
|
87
|
-
|
97
|
+
if (maskColumnMap.containsKey(name)) {
|
98
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
99
|
+
} else {
|
100
|
+
builder.setLong(inputColumn, value);
|
101
|
+
}
|
102
|
+
} else if (Types.TIMESTAMP.equals(type)) {
|
88
103
|
final Timestamp value = reader.getTimestamp(inputColumn);
|
89
|
-
|
90
|
-
|
91
|
-
|
104
|
+
if (maskColumnMap.containsKey(name)) {
|
105
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
106
|
+
} else {
|
107
|
+
builder.setTimestamp(inputColumn, value);
|
108
|
+
}
|
109
|
+
} else if (Types.JSON.equals(type)) {
|
92
110
|
final Value value = reader.getJson(inputColumn);
|
93
|
-
|
94
|
-
|
111
|
+
if (maskColumnMap.containsKey(name)) {
|
112
|
+
builder.setJson(inputColumn, maskAsJson(name, value));
|
113
|
+
} else {
|
114
|
+
builder.setJson(inputColumn, value);
|
115
|
+
}
|
95
116
|
} else {
|
96
|
-
throw new DataException("Unexpected type:" +
|
117
|
+
throw new DataException("Unexpected type:" + type);
|
97
118
|
}
|
119
|
+
}
|
120
|
+
}
|
98
121
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
122
|
+
private String maskAsString(String name, Object value) {
|
123
|
+
MaskColumn maskColumn = maskColumnMap.get(name);
|
124
|
+
String type = maskColumn.getType().get();
|
125
|
+
String pattern = maskColumn.getPattern().or("");
|
126
|
+
Integer length = maskColumn.getLength().or(-1);
|
127
|
+
Integer start = maskColumn.getStart().or(-1);
|
128
|
+
Integer end = maskColumn.getEnd().or(-1);
|
129
|
+
|
130
|
+
return mask(type, value, pattern, length, start, end);
|
131
|
+
}
|
132
|
+
|
133
|
+
private Value maskAsJson(String name, Value value) {
|
134
|
+
MaskColumn maskColumn = maskColumnMap.get(name);
|
135
|
+
DocumentContext context = parseContext.parse(value.toJson());
|
136
|
+
List<Map<String, String>> paths = maskColumn.getPaths().or(new ArrayList<Map<String, String>>());
|
137
|
+
|
138
|
+
for (Map<String, String> path : paths) {
|
139
|
+
String key = path.get("key");
|
140
|
+
String type = path.containsKey("type") ? path.get("type") : "all";
|
141
|
+
String pattern = path.containsKey("pattern") ? path.get("pattern") : "";
|
142
|
+
Integer length = path.containsKey("length") ? Integer.parseInt(path.get("length")) : -1;
|
143
|
+
Integer start = path.containsKey("start") ? Integer.parseInt(path.get("start")) : -1;
|
144
|
+
Integer end = path.containsKey("end") ? Integer.parseInt(path.get("end")) : -1;
|
145
|
+
Object element = context.read(key);
|
146
|
+
if (!key.equals("$") && element != null) {
|
147
|
+
String maskedValue = mask(type, element, pattern, length, start, end);
|
148
|
+
context.set(key, new TextNode(maskedValue).asText()).jsonString();
|
124
149
|
}
|
125
150
|
}
|
151
|
+
return jsonParser.parse(context.jsonString());
|
126
152
|
}
|
127
153
|
|
128
154
|
@Override
|
@@ -135,25 +161,61 @@ public class MaskPageOutput implements PageOutput {
|
|
135
161
|
builder.close();
|
136
162
|
}
|
137
163
|
|
138
|
-
private String mask(Object value, String pattern, Integer length) {
|
164
|
+
private String mask(String type, Object value, String pattern, Integer length, Integer start, Integer end) {
|
139
165
|
String maskedValue;
|
140
166
|
String nakedValue = value.toString();
|
141
|
-
if (
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
if (length > 0) {
|
150
|
-
maskedValue = StringUtils.repeat("*", length);
|
151
|
-
} else {
|
152
|
-
maskedValue = nakedValue.replaceAll(".", "*");
|
153
|
-
}
|
167
|
+
if (type.equals("regex")) {
|
168
|
+
maskedValue = maskRegex(nakedValue, pattern);
|
169
|
+
} else if (type.equals("substring")) {
|
170
|
+
maskedValue = maskSubstring(nakedValue, start, end, length);
|
171
|
+
} else if (type.equals("email")) {
|
172
|
+
maskedValue = maskEmail(nakedValue, length);
|
173
|
+
} else if (type.equals("all")) {
|
174
|
+
maskedValue = maskAll(nakedValue, length);
|
154
175
|
} else {
|
155
176
|
maskedValue = nakedValue;
|
156
177
|
}
|
157
178
|
return maskedValue;
|
158
179
|
}
|
180
|
+
|
181
|
+
private String maskAll(Object value, Integer length) {
|
182
|
+
String maskedValue;
|
183
|
+
String nakedValue = value.toString();
|
184
|
+
if (length > 0) {
|
185
|
+
maskedValue = StringUtils.repeat("*", length);
|
186
|
+
} else {
|
187
|
+
maskedValue = nakedValue.replaceAll(".", "*");
|
188
|
+
}
|
189
|
+
return maskedValue;
|
190
|
+
}
|
191
|
+
|
192
|
+
private String maskEmail(Object value, Integer length) {
|
193
|
+
String maskedValue;
|
194
|
+
String nakedValue = value.toString();
|
195
|
+
if (length > 0) {
|
196
|
+
String maskPattern = StringUtils.repeat("*", length) + "@$1";
|
197
|
+
maskedValue = nakedValue.replaceFirst("^.+?@(.+)$", maskPattern);
|
198
|
+
} else {
|
199
|
+
maskedValue = nakedValue.replaceAll(".(?=[^@]*@)", "*");
|
200
|
+
}
|
201
|
+
return maskedValue;
|
202
|
+
}
|
203
|
+
|
204
|
+
private String maskRegex(Object value, String pattern) {
|
205
|
+
String nakedValue = value.toString();
|
206
|
+
return nakedValue.replaceAll(pattern, "*");
|
207
|
+
}
|
208
|
+
|
209
|
+
private String maskSubstring(Object value, Integer start, Integer end, Integer length) {
|
210
|
+
String nakedValue = value.toString();
|
211
|
+
|
212
|
+
if (nakedValue.length() <= start || (0 <= end && (end - 1) <= start)) return nakedValue;
|
213
|
+
|
214
|
+
start = start < 0 ? 0 : start;
|
215
|
+
end = (end < 0 || nakedValue.length() <= end) ? nakedValue.length() : end;
|
216
|
+
int repeat = length > 0 ? length : end - start;
|
217
|
+
|
218
|
+
StringBuffer buffer = new StringBuffer(nakedValue);
|
219
|
+
return buffer.replace(start, end, StringUtils.repeat("*", repeat)).toString();
|
220
|
+
}
|
159
221
|
}
|
@@ -262,8 +262,10 @@ public class TestMaskFilterPlugin {
|
|
262
262
|
"columns:\n" +
|
263
263
|
" - { name: _c0}\n" +
|
264
264
|
" - { name: _c1, paths: [{key: $.root.key1}]}\n" +
|
265
|
-
" - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4,
|
266
|
-
" - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7,
|
265
|
+
" - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4, type: all}]}\n" +
|
266
|
+
" - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7, type: email, length: 3}]}\n" +
|
267
|
+
" - { name: _c4, paths: [{key: $.root.key1, type: regex, pattern: \"[0-9]\"}]}\n" +
|
268
|
+
" - { name: _c5, paths: [{key: $.root.key1, type: substring, start: 2, end: 4, length: 5}]}\n";
|
267
269
|
|
268
270
|
ConfigSource config = getConfigFromYaml(configYaml);
|
269
271
|
|
@@ -272,6 +274,8 @@ public class TestMaskFilterPlugin {
|
|
272
274
|
.add("_c1", JSON)
|
273
275
|
.add("_c2", JSON)
|
274
276
|
.add("_c3", JSON)
|
277
|
+
.add("_c4", JSON)
|
278
|
+
.add("_c5", JSON)
|
275
279
|
.build();
|
276
280
|
|
277
281
|
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
@@ -295,6 +299,8 @@ public class TestMaskFilterPlugin {
|
|
295
299
|
MockPageOutput mockPageOutput = new MockPageOutput();
|
296
300
|
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
297
301
|
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
302
|
+
jsonValue,
|
303
|
+
jsonValue,
|
298
304
|
jsonValue,
|
299
305
|
jsonValue,
|
300
306
|
jsonValue,
|
@@ -309,11 +315,13 @@ public class TestMaskFilterPlugin {
|
|
309
315
|
assertEquals(1, records.size());
|
310
316
|
Object[] record = records.get(0);
|
311
317
|
|
312
|
-
assertEquals(
|
318
|
+
assertEquals(6, record.length);
|
313
319
|
assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[0].toString());
|
314
320
|
assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[1].toString());
|
315
321
|
assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":\"**\",\"key4\":\"***********\"}}", record[2].toString());
|
316
322
|
assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"***@example.com\"},\"key4\":[0,1,2,3,4]}}", record[3].toString());
|
323
|
+
assertEquals("{\"root\":{\"key1\":\"value*\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[4].toString());
|
324
|
+
assertEquals("{\"root\":{\"key1\":\"va*****e1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[5].toString());
|
317
325
|
}
|
318
326
|
});
|
319
327
|
}
|
@@ -323,9 +331,9 @@ public class TestMaskFilterPlugin {
|
|
323
331
|
String configYaml = "" +
|
324
332
|
"type: mask\n" +
|
325
333
|
"columns:\n" +
|
326
|
-
" - { name: _c0,
|
327
|
-
" - { name: _c1,
|
328
|
-
" - { name: _c2,
|
334
|
+
" - { name: _c0, type: email}\n" +
|
335
|
+
" - { name: _c1, type: email}\n" +
|
336
|
+
" - { name: _c2, type: all}\n" +
|
329
337
|
" - { name: _c3}\n";
|
330
338
|
|
331
339
|
ConfigSource config = getConfigFromYaml(configYaml);
|
@@ -372,4 +380,126 @@ public class TestMaskFilterPlugin {
|
|
372
380
|
}
|
373
381
|
});
|
374
382
|
}
|
383
|
+
|
384
|
+
@Test
|
385
|
+
public void testRegexMaskType() {
|
386
|
+
String configYaml = "" +
|
387
|
+
"type: mask\n" +
|
388
|
+
"columns:\n" +
|
389
|
+
" - { name: _c1, type: regex, pattern: \"abc\" }\n" +
|
390
|
+
" - { name: _c2, type: regex, pattern: \"(abc)\" }\n" +
|
391
|
+
" - { name: _c3, type: regex, pattern: \"[0-9]+\" }\n" +
|
392
|
+
" - { name: _c4, type: regex, pattern: \"[0-9]\" }\n";
|
393
|
+
|
394
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
395
|
+
|
396
|
+
final Schema inputSchema = Schema.builder()
|
397
|
+
.add("_c0", STRING)
|
398
|
+
.add("_c1", STRING)
|
399
|
+
.add("_c2", STRING)
|
400
|
+
.add("_c3", STRING)
|
401
|
+
.add("_c4", STRING)
|
402
|
+
.build();
|
403
|
+
|
404
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
405
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
406
|
+
@Override
|
407
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
408
|
+
final String c0ColumnValue = "_c0_abcdefghi01234";
|
409
|
+
final String c1ColumnValue = "_c1_abcdefghi01234";
|
410
|
+
final String c2ColumnValue = "_c2_abcdefghi01234";
|
411
|
+
final String c3ColumnValue = "_c3_abcdefghi01234";
|
412
|
+
final String c4ColumnValue = "_c4_abcdefghi01234";
|
413
|
+
|
414
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
415
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
416
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
417
|
+
c0ColumnValue,
|
418
|
+
c1ColumnValue,
|
419
|
+
c2ColumnValue,
|
420
|
+
c3ColumnValue,
|
421
|
+
c4ColumnValue
|
422
|
+
)) {
|
423
|
+
pageOutput.add(page);
|
424
|
+
}
|
425
|
+
pageOutput.finish();
|
426
|
+
}
|
427
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
428
|
+
|
429
|
+
assertEquals(1, records.size());
|
430
|
+
Object[] record = records.get(0);
|
431
|
+
|
432
|
+
assertEquals(5, record.length);
|
433
|
+
assertEquals("_c0_abcdefghi01234", record[0]);
|
434
|
+
assertEquals("_c1_*defghi01234", record[1]);
|
435
|
+
assertEquals("_c2_*defghi01234", record[2]);
|
436
|
+
assertEquals("_c*_abcdefghi*", record[3]);
|
437
|
+
assertEquals("_c*_abcdefghi*****", record[4]);
|
438
|
+
}
|
439
|
+
});
|
440
|
+
}
|
441
|
+
|
442
|
+
@Test
|
443
|
+
public void testSubstringMaskType() {
|
444
|
+
String configYaml = "" +
|
445
|
+
"type: mask\n" +
|
446
|
+
"columns:\n" +
|
447
|
+
" - { name: _c0, type: substring }\n" +
|
448
|
+
" - { name: _c1, type: substring, start: 2, end: 5 }\n" +
|
449
|
+
" - { name: _c2, type: substring, start: 6 }\n" +
|
450
|
+
" - { name: _c3, type: substring, end: 4 }\n" +
|
451
|
+
" - { name: _c4, type: substring, start: 3, length: 5 }\n" +
|
452
|
+
" - { name: _c5, type: substring, start: 3, end: 2, length: 5 }\n"; // invalid configuration
|
453
|
+
|
454
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
455
|
+
|
456
|
+
final Schema inputSchema = Schema.builder()
|
457
|
+
.add("_c0", STRING)
|
458
|
+
.add("_c1", STRING)
|
459
|
+
.add("_c2", STRING)
|
460
|
+
.add("_c3", STRING)
|
461
|
+
.add("_c4", STRING)
|
462
|
+
.add("_c5", STRING)
|
463
|
+
.build();
|
464
|
+
|
465
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
466
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
467
|
+
@Override
|
468
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
469
|
+
final String c0ColumnValue = "_c0_abcdefghi01234";
|
470
|
+
final String c1ColumnValue = "_c1_abcdefghi01234";
|
471
|
+
final String c2ColumnValue = "_c2_abcdefghi01234";
|
472
|
+
final String c3ColumnValue = "_c3_abcdefghi01234";
|
473
|
+
final String c4ColumnValue = "_c4_abcdefghi01234";
|
474
|
+
final String c5ColumnValue = "_c5_abcdefghi01234";
|
475
|
+
|
476
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
477
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
478
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
479
|
+
c0ColumnValue,
|
480
|
+
c1ColumnValue,
|
481
|
+
c2ColumnValue,
|
482
|
+
c3ColumnValue,
|
483
|
+
c4ColumnValue,
|
484
|
+
c5ColumnValue
|
485
|
+
)) {
|
486
|
+
pageOutput.add(page);
|
487
|
+
}
|
488
|
+
pageOutput.finish();
|
489
|
+
}
|
490
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
491
|
+
|
492
|
+
assertEquals(1, records.size());
|
493
|
+
Object[] record = records.get(0);
|
494
|
+
|
495
|
+
assertEquals(6, record.length);
|
496
|
+
assertEquals("******************", record[0]);
|
497
|
+
assertEquals("_c***bcdefghi01234", record[1]);
|
498
|
+
assertEquals("_c2_ab************", record[2]);
|
499
|
+
assertEquals("****abcdefghi01234", record[3]);
|
500
|
+
assertEquals("_c4*****", record[4]);
|
501
|
+
assertEquals("_c5_abcdefghi01234", record[5]);
|
502
|
+
}
|
503
|
+
});
|
504
|
+
}
|
375
505
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-mask
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tetsuo Yamabe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
51
52
|
- build.gradle
|
@@ -59,12 +60,12 @@ files:
|
|
59
60
|
- src/main/java/org/embulk/filter/mask/MaskFilterPlugin.java
|
60
61
|
- src/main/java/org/embulk/filter/mask/MaskPageOutput.java
|
61
62
|
- src/test/java/org/embulk/filter/mask/TestMaskFilterPlugin.java
|
62
|
-
- classpath/accessors-smart-1.
|
63
|
-
- classpath/asm-5.0.
|
64
|
-
- classpath/embulk-filter-mask-0.
|
65
|
-
- classpath/json-path-2.
|
66
|
-
- classpath/json-smart-2.
|
67
|
-
- classpath/slf4j-api-1.7.
|
63
|
+
- classpath/accessors-smart-1.2.jar
|
64
|
+
- classpath/asm-5.0.4.jar
|
65
|
+
- classpath/embulk-filter-mask-0.2.1.jar
|
66
|
+
- classpath/json-path-2.4.0.jar
|
67
|
+
- classpath/json-smart-2.3.jar
|
68
|
+
- classpath/slf4j-api-1.7.25.jar
|
68
69
|
homepage: https://github.com/beniyama/embulk-filter-mask
|
69
70
|
licenses:
|
70
71
|
- MIT
|