embulk-filter-mask 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +42 -9
- data/build.gradle +13 -4
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/filter/mask/MaskFilterPlugin.java +9 -1
- data/src/main/java/org/embulk/filter/mask/MaskPageOutput.java +121 -59
- data/src/test/java/org/embulk/filter/mask/TestMaskFilterPlugin.java +136 -6
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 420f82310881451f5b66848494db06f292c9f755
|
4
|
+
data.tar.gz: b3fb787c18bf556cf169d85343a6dacb4030f116
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 501044a9ebb52794d3bda67aa9ef947157300be945e798ace60068c7162206c522dfa228eb936659493bf40135d37a2afb5f9fcf9dcf324a36c40b636a5cf1e1
|
7
|
+
data.tar.gz: b482c2be719bab2770ba7f34c618d0d2bd611f56103bcec9df102be970965d9defbfb2af6297671d1e78674a4ba5ca1eb39eda773c8c0fe5de3f341b3b6c21c8
|
data/.travis.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
language: java
|
2
|
+
jdk:
|
3
|
+
# Tentatively ignore these environments as we face SSL errors
|
4
|
+
# https://github.com/gradle/gradle/issues/2421
|
5
|
+
#
|
6
|
+
# - openjdk7
|
7
|
+
# - oraclejdk7
|
8
|
+
- oraclejdk8
|
9
|
+
script:
|
10
|
+
- ./gradlew test
|
11
|
+
after_success:
|
12
|
+
- ./gradlew jacocoTestReport coveralls
|
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Mask filter plugin for Embulk
|
2
2
|
|
3
|
-
|
3
|
+
[](https://coveralls.io/github/beniyama/embulk-filter-mask)
|
4
|
+
|
5
|
+
Mask columns with asterisks in a variety of patterns (still in initial development phase and missing basic features to use in production).
|
4
6
|
|
5
7
|
## Overview
|
6
8
|
|
@@ -8,13 +10,23 @@ mask columns with asterisks (still in initial development phase and missing basi
|
|
8
10
|
|
9
11
|
## Configuration
|
10
12
|
|
13
|
+
*Caution* : Now we use `type` to specify mask types such as `all` and `email`, instead of `pattern` which was used in version 0.1.1 or earlier.
|
14
|
+
|
11
15
|
- **columns**: target columns which would be replaced with asterisks (string, required)
|
12
16
|
- **name**: name of the column (string, required)
|
13
|
-
- **
|
14
|
-
- **
|
17
|
+
- **type**: mask type, `all`, `email`, `regex` or `substring` (string, default: `all`)
|
18
|
+
- **paths**: list of JSON path and type, works if the column type is JSON
|
19
|
+
- `[{key: $.json_path1}, {key: $.json_path2}]` would mask both `$.json_path1` and `$.json_path2` nodes
|
20
|
+
- Elements under the nodes would be converted to string and then masked (e.g., `[0,1,2]` -> `*******`)
|
21
|
+
- **length**: if specified, this filter replaces the column with fixed number of asterisks (integer, optional. supported only in `all`, `email`, `substring`.)
|
22
|
+
- **pattern**: Regex pattern such as "[0-9]+" (string, required for `regex` type)
|
23
|
+
- **start**: The beginning index for `substring` type. The value starts from 0 and inclusive (integer, default: 0)
|
24
|
+
- **end**: The ending index for `substring` type. The value is exclusive (integer, default: length of the target column)
|
15
25
|
|
16
26
|
## Example
|
17
27
|
|
28
|
+
|
29
|
+
|
18
30
|
If you have below data in csv or other format file,
|
19
31
|
|
20
32
|
|first_name | last_name | gender | age | contact |
|
@@ -33,7 +45,7 @@ filters:
|
|
33
45
|
columns:
|
34
46
|
- { name: last_name}
|
35
47
|
- { name: age}
|
36
|
-
- { name: contact,
|
48
|
+
- { name: contact, type: email, length: 5}
|
37
49
|
```
|
38
50
|
|
39
51
|
would produce
|
@@ -46,9 +58,29 @@ would produce
|
|
46
58
|
| Christian | **** | male | ** | *****@example.com |
|
47
59
|
| Amy | ***** | female | ** | *****@example.com |
|
48
60
|
|
61
|
+
If you use `regex` and/or `substring` types,
|
62
|
+
|
63
|
+
```yaml
|
64
|
+
filters:
|
65
|
+
- type: mask
|
66
|
+
columns:
|
67
|
+
- { name: last_name, type: regex, pattern: "[a-z]"}
|
68
|
+
- { name: contact, type: substring, start: 5, length: 5}
|
69
|
+
```
|
70
|
+
|
71
|
+
would produce
|
72
|
+
|
73
|
+
|first_name | last_name | gender | age | contact |
|
74
|
+
|---|---|---|---|---|
|
75
|
+
| B******* | Bell | male | 30 | bell.***** |
|
76
|
+
| L**** | Duncan | male | 20 | lucas***** |
|
77
|
+
| E******* | May | female | 25 | eliza***** |
|
78
|
+
| C******** | Reid | male | 15 | chris***** |
|
79
|
+
| A** | Avery | female | 40 | amy.a***** |
|
80
|
+
|
49
81
|
JSON type column is also partially supported.
|
50
82
|
|
51
|
-
If you have
|
83
|
+
If you have a `user` column with this JSON data structure
|
52
84
|
|
53
85
|
```json
|
54
86
|
{
|
@@ -57,7 +89,8 @@ If you have
|
|
57
89
|
"last_name": "Bell"
|
58
90
|
},
|
59
91
|
"gender": "male",
|
60
|
-
"age": 30
|
92
|
+
"age": 30,
|
93
|
+
"email": "test_mail@example.com"
|
61
94
|
}
|
62
95
|
```
|
63
96
|
|
@@ -67,8 +100,7 @@ below filter configuration
|
|
67
100
|
filters:
|
68
101
|
- type: mask
|
69
102
|
columns:
|
70
|
-
- { name:
|
71
|
-
- { name: age, path: $.}
|
103
|
+
- { name: user, paths: [{key: $.full_name.first_name}, {key: $.email, type: email}]}
|
72
104
|
```
|
73
105
|
|
74
106
|
would produce
|
@@ -80,7 +112,8 @@ would produce
|
|
80
112
|
"last_name": "Bell"
|
81
113
|
},
|
82
114
|
"gender": "male",
|
83
|
-
"age":
|
115
|
+
"age": 30,
|
116
|
+
"email": "*********@example.com"
|
84
117
|
}
|
85
118
|
```
|
86
119
|
|
data/build.gradle
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
plugins {
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
|
+
id "com.github.kt3k.coveralls" version "2.8.1"
|
4
5
|
id "java"
|
5
6
|
id "checkstyle"
|
7
|
+
id "jacoco"
|
6
8
|
}
|
7
9
|
import com.github.jrubygradle.JRubyExec
|
8
10
|
repositories {
|
@@ -13,17 +15,24 @@ configurations {
|
|
13
15
|
provided
|
14
16
|
}
|
15
17
|
|
16
|
-
version = "0.
|
18
|
+
version = "0.2.1"
|
17
19
|
|
18
20
|
sourceCompatibility = 1.7
|
19
21
|
targetCompatibility = 1.7
|
20
22
|
|
21
23
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.8.
|
23
|
-
provided "org.embulk:embulk-core:0.8.
|
24
|
+
compile "org.embulk:embulk-core:0.8.29"
|
25
|
+
provided "org.embulk:embulk-core:0.8.29"
|
24
26
|
compile "com.jayway.jsonpath:json-path:2.+"
|
25
27
|
testCompile "junit:junit:4.+"
|
26
|
-
testCompile "org.embulk:embulk-core:0.8.
|
28
|
+
testCompile "org.embulk:embulk-core:0.8.29:tests"
|
29
|
+
}
|
30
|
+
|
31
|
+
jacocoTestReport {
|
32
|
+
reports {
|
33
|
+
xml.enabled = true // coveralls plugin depends on xml format report
|
34
|
+
html.enabled = true
|
35
|
+
}
|
27
36
|
}
|
28
37
|
|
29
38
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -30,7 +30,7 @@ public class MaskFilterPlugin implements FilterPlugin {
|
|
30
30
|
String getName();
|
31
31
|
|
32
32
|
@Config("type")
|
33
|
-
@ConfigDefault("\"
|
33
|
+
@ConfigDefault("\"all\"")
|
34
34
|
Optional<String> getType();
|
35
35
|
|
36
36
|
@Config("pattern")
|
@@ -41,6 +41,14 @@ public class MaskFilterPlugin implements FilterPlugin {
|
|
41
41
|
@ConfigDefault("null")
|
42
42
|
Optional<Integer> getLength();
|
43
43
|
|
44
|
+
@Config("start")
|
45
|
+
@ConfigDefault("null")
|
46
|
+
Optional<Integer> getStart();
|
47
|
+
|
48
|
+
@Config("end")
|
49
|
+
@ConfigDefault("null")
|
50
|
+
Optional<Integer> getEnd();
|
51
|
+
|
44
52
|
@Config("paths")
|
45
53
|
@ConfigDefault("null")
|
46
54
|
Optional<List<Map<String, String>>> getPaths();
|
@@ -7,6 +7,7 @@ import org.embulk.config.TaskSource;
|
|
7
7
|
import org.embulk.spi.*;
|
8
8
|
import org.embulk.spi.json.JsonParser;
|
9
9
|
import org.embulk.spi.time.Timestamp;
|
10
|
+
import org.embulk.spi.type.Type;
|
10
11
|
import org.embulk.spi.type.Types;
|
11
12
|
import org.embulk.filter.mask.MaskFilterPlugin.*;
|
12
13
|
import org.msgpack.value.Value;
|
@@ -67,62 +68,87 @@ public class MaskPageOutput implements PageOutput {
|
|
67
68
|
continue;
|
68
69
|
}
|
69
70
|
|
70
|
-
|
71
|
-
|
71
|
+
String name = inputColumn.getName();
|
72
|
+
Type type = inputColumn.getType();
|
73
|
+
|
74
|
+
if (Types.STRING.equals(type)) {
|
72
75
|
final String value = reader.getString(inputColumn);
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
+
if (maskColumnMap.containsKey(name)) {
|
77
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
78
|
+
} else {
|
79
|
+
builder.setString(inputColumn, value);
|
80
|
+
}
|
81
|
+
} else if (Types.BOOLEAN.equals(type)) {
|
76
82
|
final boolean value = reader.getBoolean(inputColumn);
|
77
|
-
|
78
|
-
|
79
|
-
|
83
|
+
if (maskColumnMap.containsKey(name)) {
|
84
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
85
|
+
} else {
|
86
|
+
builder.setBoolean(inputColumn, value);
|
87
|
+
}
|
88
|
+
} else if (Types.DOUBLE.equals(type)) {
|
80
89
|
final double value = reader.getDouble(inputColumn);
|
81
|
-
|
82
|
-
|
83
|
-
|
90
|
+
if (maskColumnMap.containsKey(name)) {
|
91
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
92
|
+
} else {
|
93
|
+
builder.setDouble(inputColumn, value);
|
94
|
+
}
|
95
|
+
} else if (Types.LONG.equals(type)) {
|
84
96
|
final long value = reader.getLong(inputColumn);
|
85
|
-
|
86
|
-
|
87
|
-
|
97
|
+
if (maskColumnMap.containsKey(name)) {
|
98
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
99
|
+
} else {
|
100
|
+
builder.setLong(inputColumn, value);
|
101
|
+
}
|
102
|
+
} else if (Types.TIMESTAMP.equals(type)) {
|
88
103
|
final Timestamp value = reader.getTimestamp(inputColumn);
|
89
|
-
|
90
|
-
|
91
|
-
|
104
|
+
if (maskColumnMap.containsKey(name)) {
|
105
|
+
builder.setString(inputColumn, maskAsString(name, value));
|
106
|
+
} else {
|
107
|
+
builder.setTimestamp(inputColumn, value);
|
108
|
+
}
|
109
|
+
} else if (Types.JSON.equals(type)) {
|
92
110
|
final Value value = reader.getJson(inputColumn);
|
93
|
-
|
94
|
-
|
111
|
+
if (maskColumnMap.containsKey(name)) {
|
112
|
+
builder.setJson(inputColumn, maskAsJson(name, value));
|
113
|
+
} else {
|
114
|
+
builder.setJson(inputColumn, value);
|
115
|
+
}
|
95
116
|
} else {
|
96
|
-
throw new DataException("Unexpected type:" +
|
117
|
+
throw new DataException("Unexpected type:" + type);
|
97
118
|
}
|
119
|
+
}
|
120
|
+
}
|
98
121
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
122
|
+
private String maskAsString(String name, Object value) {
|
123
|
+
MaskColumn maskColumn = maskColumnMap.get(name);
|
124
|
+
String type = maskColumn.getType().get();
|
125
|
+
String pattern = maskColumn.getPattern().or("");
|
126
|
+
Integer length = maskColumn.getLength().or(-1);
|
127
|
+
Integer start = maskColumn.getStart().or(-1);
|
128
|
+
Integer end = maskColumn.getEnd().or(-1);
|
129
|
+
|
130
|
+
return mask(type, value, pattern, length, start, end);
|
131
|
+
}
|
132
|
+
|
133
|
+
private Value maskAsJson(String name, Value value) {
|
134
|
+
MaskColumn maskColumn = maskColumnMap.get(name);
|
135
|
+
DocumentContext context = parseContext.parse(value.toJson());
|
136
|
+
List<Map<String, String>> paths = maskColumn.getPaths().or(new ArrayList<Map<String, String>>());
|
137
|
+
|
138
|
+
for (Map<String, String> path : paths) {
|
139
|
+
String key = path.get("key");
|
140
|
+
String type = path.containsKey("type") ? path.get("type") : "all";
|
141
|
+
String pattern = path.containsKey("pattern") ? path.get("pattern") : "";
|
142
|
+
Integer length = path.containsKey("length") ? Integer.parseInt(path.get("length")) : -1;
|
143
|
+
Integer start = path.containsKey("start") ? Integer.parseInt(path.get("start")) : -1;
|
144
|
+
Integer end = path.containsKey("end") ? Integer.parseInt(path.get("end")) : -1;
|
145
|
+
Object element = context.read(key);
|
146
|
+
if (!key.equals("$") && element != null) {
|
147
|
+
String maskedValue = mask(type, element, pattern, length, start, end);
|
148
|
+
context.set(key, new TextNode(maskedValue).asText()).jsonString();
|
124
149
|
}
|
125
150
|
}
|
151
|
+
return jsonParser.parse(context.jsonString());
|
126
152
|
}
|
127
153
|
|
128
154
|
@Override
|
@@ -135,25 +161,61 @@ public class MaskPageOutput implements PageOutput {
|
|
135
161
|
builder.close();
|
136
162
|
}
|
137
163
|
|
138
|
-
private String mask(Object value, String pattern, Integer length) {
|
164
|
+
private String mask(String type, Object value, String pattern, Integer length, Integer start, Integer end) {
|
139
165
|
String maskedValue;
|
140
166
|
String nakedValue = value.toString();
|
141
|
-
if (
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
if (length > 0) {
|
150
|
-
maskedValue = StringUtils.repeat("*", length);
|
151
|
-
} else {
|
152
|
-
maskedValue = nakedValue.replaceAll(".", "*");
|
153
|
-
}
|
167
|
+
if (type.equals("regex")) {
|
168
|
+
maskedValue = maskRegex(nakedValue, pattern);
|
169
|
+
} else if (type.equals("substring")) {
|
170
|
+
maskedValue = maskSubstring(nakedValue, start, end, length);
|
171
|
+
} else if (type.equals("email")) {
|
172
|
+
maskedValue = maskEmail(nakedValue, length);
|
173
|
+
} else if (type.equals("all")) {
|
174
|
+
maskedValue = maskAll(nakedValue, length);
|
154
175
|
} else {
|
155
176
|
maskedValue = nakedValue;
|
156
177
|
}
|
157
178
|
return maskedValue;
|
158
179
|
}
|
180
|
+
|
181
|
+
private String maskAll(Object value, Integer length) {
|
182
|
+
String maskedValue;
|
183
|
+
String nakedValue = value.toString();
|
184
|
+
if (length > 0) {
|
185
|
+
maskedValue = StringUtils.repeat("*", length);
|
186
|
+
} else {
|
187
|
+
maskedValue = nakedValue.replaceAll(".", "*");
|
188
|
+
}
|
189
|
+
return maskedValue;
|
190
|
+
}
|
191
|
+
|
192
|
+
private String maskEmail(Object value, Integer length) {
|
193
|
+
String maskedValue;
|
194
|
+
String nakedValue = value.toString();
|
195
|
+
if (length > 0) {
|
196
|
+
String maskPattern = StringUtils.repeat("*", length) + "@$1";
|
197
|
+
maskedValue = nakedValue.replaceFirst("^.+?@(.+)$", maskPattern);
|
198
|
+
} else {
|
199
|
+
maskedValue = nakedValue.replaceAll(".(?=[^@]*@)", "*");
|
200
|
+
}
|
201
|
+
return maskedValue;
|
202
|
+
}
|
203
|
+
|
204
|
+
private String maskRegex(Object value, String pattern) {
|
205
|
+
String nakedValue = value.toString();
|
206
|
+
return nakedValue.replaceAll(pattern, "*");
|
207
|
+
}
|
208
|
+
|
209
|
+
private String maskSubstring(Object value, Integer start, Integer end, Integer length) {
|
210
|
+
String nakedValue = value.toString();
|
211
|
+
|
212
|
+
if (nakedValue.length() <= start || (0 <= end && (end - 1) <= start)) return nakedValue;
|
213
|
+
|
214
|
+
start = start < 0 ? 0 : start;
|
215
|
+
end = (end < 0 || nakedValue.length() <= end) ? nakedValue.length() : end;
|
216
|
+
int repeat = length > 0 ? length : end - start;
|
217
|
+
|
218
|
+
StringBuffer buffer = new StringBuffer(nakedValue);
|
219
|
+
return buffer.replace(start, end, StringUtils.repeat("*", repeat)).toString();
|
220
|
+
}
|
159
221
|
}
|
@@ -262,8 +262,10 @@ public class TestMaskFilterPlugin {
|
|
262
262
|
"columns:\n" +
|
263
263
|
" - { name: _c0}\n" +
|
264
264
|
" - { name: _c1, paths: [{key: $.root.key1}]}\n" +
|
265
|
-
" - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4,
|
266
|
-
" - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7,
|
265
|
+
" - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4, type: all}]}\n" +
|
266
|
+
" - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7, type: email, length: 3}]}\n" +
|
267
|
+
" - { name: _c4, paths: [{key: $.root.key1, type: regex, pattern: \"[0-9]\"}]}\n" +
|
268
|
+
" - { name: _c5, paths: [{key: $.root.key1, type: substring, start: 2, end: 4, length: 5}]}\n";
|
267
269
|
|
268
270
|
ConfigSource config = getConfigFromYaml(configYaml);
|
269
271
|
|
@@ -272,6 +274,8 @@ public class TestMaskFilterPlugin {
|
|
272
274
|
.add("_c1", JSON)
|
273
275
|
.add("_c2", JSON)
|
274
276
|
.add("_c3", JSON)
|
277
|
+
.add("_c4", JSON)
|
278
|
+
.add("_c5", JSON)
|
275
279
|
.build();
|
276
280
|
|
277
281
|
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
@@ -295,6 +299,8 @@ public class TestMaskFilterPlugin {
|
|
295
299
|
MockPageOutput mockPageOutput = new MockPageOutput();
|
296
300
|
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
297
301
|
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
302
|
+
jsonValue,
|
303
|
+
jsonValue,
|
298
304
|
jsonValue,
|
299
305
|
jsonValue,
|
300
306
|
jsonValue,
|
@@ -309,11 +315,13 @@ public class TestMaskFilterPlugin {
|
|
309
315
|
assertEquals(1, records.size());
|
310
316
|
Object[] record = records.get(0);
|
311
317
|
|
312
|
-
assertEquals(
|
318
|
+
assertEquals(6, record.length);
|
313
319
|
assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[0].toString());
|
314
320
|
assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[1].toString());
|
315
321
|
assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":\"**\",\"key4\":\"***********\"}}", record[2].toString());
|
316
322
|
assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"***@example.com\"},\"key4\":[0,1,2,3,4]}}", record[3].toString());
|
323
|
+
assertEquals("{\"root\":{\"key1\":\"value*\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[4].toString());
|
324
|
+
assertEquals("{\"root\":{\"key1\":\"va*****e1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[5].toString());
|
317
325
|
}
|
318
326
|
});
|
319
327
|
}
|
@@ -323,9 +331,9 @@ public class TestMaskFilterPlugin {
|
|
323
331
|
String configYaml = "" +
|
324
332
|
"type: mask\n" +
|
325
333
|
"columns:\n" +
|
326
|
-
" - { name: _c0,
|
327
|
-
" - { name: _c1,
|
328
|
-
" - { name: _c2,
|
334
|
+
" - { name: _c0, type: email}\n" +
|
335
|
+
" - { name: _c1, type: email}\n" +
|
336
|
+
" - { name: _c2, type: all}\n" +
|
329
337
|
" - { name: _c3}\n";
|
330
338
|
|
331
339
|
ConfigSource config = getConfigFromYaml(configYaml);
|
@@ -372,4 +380,126 @@ public class TestMaskFilterPlugin {
|
|
372
380
|
}
|
373
381
|
});
|
374
382
|
}
|
383
|
+
|
384
|
+
@Test
|
385
|
+
public void testRegexMaskType() {
|
386
|
+
String configYaml = "" +
|
387
|
+
"type: mask\n" +
|
388
|
+
"columns:\n" +
|
389
|
+
" - { name: _c1, type: regex, pattern: \"abc\" }\n" +
|
390
|
+
" - { name: _c2, type: regex, pattern: \"(abc)\" }\n" +
|
391
|
+
" - { name: _c3, type: regex, pattern: \"[0-9]+\" }\n" +
|
392
|
+
" - { name: _c4, type: regex, pattern: \"[0-9]\" }\n";
|
393
|
+
|
394
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
395
|
+
|
396
|
+
final Schema inputSchema = Schema.builder()
|
397
|
+
.add("_c0", STRING)
|
398
|
+
.add("_c1", STRING)
|
399
|
+
.add("_c2", STRING)
|
400
|
+
.add("_c3", STRING)
|
401
|
+
.add("_c4", STRING)
|
402
|
+
.build();
|
403
|
+
|
404
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
405
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
406
|
+
@Override
|
407
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
408
|
+
final String c0ColumnValue = "_c0_abcdefghi01234";
|
409
|
+
final String c1ColumnValue = "_c1_abcdefghi01234";
|
410
|
+
final String c2ColumnValue = "_c2_abcdefghi01234";
|
411
|
+
final String c3ColumnValue = "_c3_abcdefghi01234";
|
412
|
+
final String c4ColumnValue = "_c4_abcdefghi01234";
|
413
|
+
|
414
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
415
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
416
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
417
|
+
c0ColumnValue,
|
418
|
+
c1ColumnValue,
|
419
|
+
c2ColumnValue,
|
420
|
+
c3ColumnValue,
|
421
|
+
c4ColumnValue
|
422
|
+
)) {
|
423
|
+
pageOutput.add(page);
|
424
|
+
}
|
425
|
+
pageOutput.finish();
|
426
|
+
}
|
427
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
428
|
+
|
429
|
+
assertEquals(1, records.size());
|
430
|
+
Object[] record = records.get(0);
|
431
|
+
|
432
|
+
assertEquals(5, record.length);
|
433
|
+
assertEquals("_c0_abcdefghi01234", record[0]);
|
434
|
+
assertEquals("_c1_*defghi01234", record[1]);
|
435
|
+
assertEquals("_c2_*defghi01234", record[2]);
|
436
|
+
assertEquals("_c*_abcdefghi*", record[3]);
|
437
|
+
assertEquals("_c*_abcdefghi*****", record[4]);
|
438
|
+
}
|
439
|
+
});
|
440
|
+
}
|
441
|
+
|
442
|
+
@Test
|
443
|
+
public void testSubstringMaskType() {
|
444
|
+
String configYaml = "" +
|
445
|
+
"type: mask\n" +
|
446
|
+
"columns:\n" +
|
447
|
+
" - { name: _c0, type: substring }\n" +
|
448
|
+
" - { name: _c1, type: substring, start: 2, end: 5 }\n" +
|
449
|
+
" - { name: _c2, type: substring, start: 6 }\n" +
|
450
|
+
" - { name: _c3, type: substring, end: 4 }\n" +
|
451
|
+
" - { name: _c4, type: substring, start: 3, length: 5 }\n" +
|
452
|
+
" - { name: _c5, type: substring, start: 3, end: 2, length: 5 }\n"; // invalid configuration
|
453
|
+
|
454
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
455
|
+
|
456
|
+
final Schema inputSchema = Schema.builder()
|
457
|
+
.add("_c0", STRING)
|
458
|
+
.add("_c1", STRING)
|
459
|
+
.add("_c2", STRING)
|
460
|
+
.add("_c3", STRING)
|
461
|
+
.add("_c4", STRING)
|
462
|
+
.add("_c5", STRING)
|
463
|
+
.build();
|
464
|
+
|
465
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
466
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
467
|
+
@Override
|
468
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
469
|
+
final String c0ColumnValue = "_c0_abcdefghi01234";
|
470
|
+
final String c1ColumnValue = "_c1_abcdefghi01234";
|
471
|
+
final String c2ColumnValue = "_c2_abcdefghi01234";
|
472
|
+
final String c3ColumnValue = "_c3_abcdefghi01234";
|
473
|
+
final String c4ColumnValue = "_c4_abcdefghi01234";
|
474
|
+
final String c5ColumnValue = "_c5_abcdefghi01234";
|
475
|
+
|
476
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
477
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
478
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
479
|
+
c0ColumnValue,
|
480
|
+
c1ColumnValue,
|
481
|
+
c2ColumnValue,
|
482
|
+
c3ColumnValue,
|
483
|
+
c4ColumnValue,
|
484
|
+
c5ColumnValue
|
485
|
+
)) {
|
486
|
+
pageOutput.add(page);
|
487
|
+
}
|
488
|
+
pageOutput.finish();
|
489
|
+
}
|
490
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
491
|
+
|
492
|
+
assertEquals(1, records.size());
|
493
|
+
Object[] record = records.get(0);
|
494
|
+
|
495
|
+
assertEquals(6, record.length);
|
496
|
+
assertEquals("******************", record[0]);
|
497
|
+
assertEquals("_c***bcdefghi01234", record[1]);
|
498
|
+
assertEquals("_c2_ab************", record[2]);
|
499
|
+
assertEquals("****abcdefghi01234", record[3]);
|
500
|
+
assertEquals("_c4*****", record[4]);
|
501
|
+
assertEquals("_c5_abcdefghi01234", record[5]);
|
502
|
+
}
|
503
|
+
});
|
504
|
+
}
|
375
505
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-mask
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tetsuo Yamabe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
51
52
|
- build.gradle
|
@@ -59,12 +60,12 @@ files:
|
|
59
60
|
- src/main/java/org/embulk/filter/mask/MaskFilterPlugin.java
|
60
61
|
- src/main/java/org/embulk/filter/mask/MaskPageOutput.java
|
61
62
|
- src/test/java/org/embulk/filter/mask/TestMaskFilterPlugin.java
|
62
|
-
- classpath/accessors-smart-1.
|
63
|
-
- classpath/asm-5.0.
|
64
|
-
- classpath/embulk-filter-mask-0.
|
65
|
-
- classpath/json-path-2.
|
66
|
-
- classpath/json-smart-2.
|
67
|
-
- classpath/slf4j-api-1.7.
|
63
|
+
- classpath/accessors-smart-1.2.jar
|
64
|
+
- classpath/asm-5.0.4.jar
|
65
|
+
- classpath/embulk-filter-mask-0.2.1.jar
|
66
|
+
- classpath/json-path-2.4.0.jar
|
67
|
+
- classpath/json-smart-2.3.jar
|
68
|
+
- classpath/slf4j-api-1.7.25.jar
|
68
69
|
homepage: https://github.com/beniyama/embulk-filter-mask
|
69
70
|
licenses:
|
70
71
|
- MIT
|