embulk-filter-mask 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 131e2785761ffc21130a3351ed57d7e2a96df19d
|
4
|
+
data.tar.gz: 02d57d7419c31e43d91d19c23f36805ad9f4046f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95c7e11221c993510b6a5ab556f1460397d91d24b7b27740664ee10210465d20866de3743d81bdd0e3f35b024e4ded1135535887d67bdef69b8a6dacd3a386f5
|
7
|
+
data.tar.gz: e930e6d4510b46db97dd7343edeaa0c7079127d152bd07865d01aefcf388c55622d1eec4225525c9f904a39b0cec6df3235a2aea95d2d7784823782bd0c570af
|
data/build.gradle
CHANGED
@@ -13,17 +13,17 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.1.1"
|
17
17
|
|
18
18
|
sourceCompatibility = 1.7
|
19
19
|
targetCompatibility = 1.7
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.8.
|
23
|
-
provided "org.embulk:embulk-core:0.8.
|
22
|
+
compile "org.embulk:embulk-core:0.8.15"
|
23
|
+
provided "org.embulk:embulk-core:0.8.15"
|
24
24
|
compile "com.jayway.jsonpath:json-path:2.+"
|
25
25
|
testCompile "junit:junit:4.+"
|
26
|
-
testCompile "org.embulk:embulk-core:0.8.
|
26
|
+
testCompile "org.embulk:embulk-core:0.8.15:tests"
|
27
27
|
}
|
28
28
|
|
29
29
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -37,9 +37,13 @@ public class MaskFilterPlugin implements FilterPlugin {
|
|
37
37
|
@ConfigDefault("\"all\"")
|
38
38
|
Optional<String> getPattern();
|
39
39
|
|
40
|
-
@Config("
|
41
|
-
@ConfigDefault("
|
42
|
-
Optional<
|
40
|
+
@Config("length")
|
41
|
+
@ConfigDefault("null")
|
42
|
+
Optional<Integer> getLength();
|
43
|
+
|
44
|
+
@Config("paths")
|
45
|
+
@ConfigDefault("null")
|
46
|
+
Optional<List<Map<String, String>>> getPaths();
|
43
47
|
}
|
44
48
|
|
45
49
|
@Override
|
@@ -1,10 +1,8 @@
|
|
1
1
|
package org.embulk.filter.mask;
|
2
2
|
|
3
3
|
import com.fasterxml.jackson.databind.node.TextNode;
|
4
|
-
import com.jayway.jsonpath
|
5
|
-
import
|
6
|
-
import com.jayway.jsonpath.Option;
|
7
|
-
import com.jayway.jsonpath.ParseContext;
|
4
|
+
import com.jayway.jsonpath.*;
|
5
|
+
import org.apache.commons.lang3.StringUtils;
|
8
6
|
import org.embulk.config.TaskSource;
|
9
7
|
import org.embulk.spi.*;
|
10
8
|
import org.embulk.spi.json.JsonParser;
|
@@ -14,6 +12,7 @@ import org.embulk.filter.mask.MaskFilterPlugin.*;
|
|
14
12
|
import org.msgpack.value.Value;
|
15
13
|
import org.slf4j.Logger;
|
16
14
|
|
15
|
+
import java.util.ArrayList;
|
17
16
|
import java.util.HashMap;
|
18
17
|
import java.util.List;
|
19
18
|
import java.util.Map;
|
@@ -99,17 +98,27 @@ public class MaskPageOutput implements PageOutput {
|
|
99
98
|
|
100
99
|
if (maskColumnMap.containsKey(inputColumn.getName())) {
|
101
100
|
MaskColumn maskColumn = maskColumnMap.get(inputColumn.getName());
|
102
|
-
String targetValue = inputValue.toString();
|
103
|
-
String pattern = maskColumn.getPattern().get();
|
104
101
|
|
105
102
|
if (Types.JSON.equals(inputColumn.getType())) {
|
106
|
-
|
107
|
-
|
108
|
-
String
|
109
|
-
|
110
|
-
|
103
|
+
Value inputJson = (Value) inputValue;
|
104
|
+
DocumentContext context = parseContext.parse(inputJson.toJson());
|
105
|
+
List<Map<String, String>> paths = maskColumn.getPaths().or(new ArrayList<Map<String, String>>());
|
106
|
+
|
107
|
+
for (Map<String, String> path : paths) {
|
108
|
+
String key = path.get("key");
|
109
|
+
String pattern = path.containsKey("pattern") ? path.get("pattern") : "all";
|
110
|
+
int maskLength = path.containsKey("length") ? Integer.parseInt(path.get("length")) : 0;
|
111
|
+
Object element = context.read(key);
|
112
|
+
if (!key.equals("$") && element != null) {
|
113
|
+
String maskedValue = mask(element, pattern, maskLength);
|
114
|
+
String maskedJson = context.set(key, new TextNode(maskedValue).asText()).jsonString();
|
115
|
+
builder.setJson(inputColumn, jsonParser.parse(maskedJson));
|
116
|
+
}
|
117
|
+
}
|
111
118
|
} else {
|
112
|
-
String
|
119
|
+
String pattern = maskColumn.getPattern().get();
|
120
|
+
int maskLength = maskColumn.getLength().or(0);
|
121
|
+
String maskedString = mask(inputValue, pattern, maskLength);
|
113
122
|
builder.setString(inputColumn, maskedString);
|
114
123
|
}
|
115
124
|
}
|
@@ -126,16 +135,24 @@ public class MaskPageOutput implements PageOutput {
|
|
126
135
|
builder.close();
|
127
136
|
}
|
128
137
|
|
129
|
-
private String mask(
|
138
|
+
private String mask(Object value, String pattern, Integer length) {
|
130
139
|
String maskedValue;
|
140
|
+
String nakedValue = value.toString();
|
131
141
|
if (pattern.equals("email")) {
|
132
|
-
|
133
|
-
|
134
|
-
|
142
|
+
if (length > 0) {
|
143
|
+
String maskPattern = StringUtils.repeat("*", length) + "@$1";
|
144
|
+
maskedValue = nakedValue.replaceFirst("^.+?@(.+)$", maskPattern);
|
145
|
+
} else {
|
146
|
+
maskedValue = nakedValue.replaceAll(".(?=[^@]*@)", "*");
|
147
|
+
}
|
135
148
|
} else if (pattern.equals("all")) {
|
136
|
-
|
149
|
+
if (length > 0) {
|
150
|
+
maskedValue = StringUtils.repeat("*", length);
|
151
|
+
} else {
|
152
|
+
maskedValue = nakedValue.replaceAll(".", "*");
|
153
|
+
}
|
137
154
|
} else {
|
138
|
-
maskedValue =
|
155
|
+
maskedValue = nakedValue;
|
139
156
|
}
|
140
157
|
return maskedValue;
|
141
158
|
}
|
@@ -1,14 +1,31 @@
|
|
1
1
|
package org.embulk.filter.mask;
|
2
2
|
|
3
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
|
+
import com.google.common.base.Throwables;
|
6
|
+
import com.google.common.collect.ImmutableMap;
|
3
7
|
import org.embulk.EmbulkTestRuntime;
|
4
8
|
import org.embulk.config.ConfigException;
|
5
9
|
import org.embulk.config.ConfigLoader;
|
6
10
|
import org.embulk.config.ConfigSource;
|
7
|
-
import org.embulk.
|
8
|
-
import org.embulk.spi
|
11
|
+
import org.embulk.config.TaskSource;
|
12
|
+
import org.embulk.spi.*;
|
13
|
+
import org.embulk.spi.TestPageBuilderReader.*;
|
14
|
+
import org.embulk.spi.time.Timestamp;
|
15
|
+
import org.embulk.spi.util.Pages;
|
9
16
|
import org.junit.Rule;
|
10
17
|
import org.junit.Test;
|
11
18
|
import org.junit.rules.ExpectedException;
|
19
|
+
import org.msgpack.value.Value;
|
20
|
+
|
21
|
+
import java.util.List;
|
22
|
+
|
23
|
+
|
24
|
+
import static org.embulk.filter.mask.MaskFilterPlugin.PluginTask;
|
25
|
+
import static org.embulk.filter.mask.MaskFilterPlugin.Control;
|
26
|
+
import static org.embulk.spi.type.Types.*;
|
27
|
+
import static org.junit.Assert.assertEquals;
|
28
|
+
import static org.msgpack.value.ValueFactory.*;
|
12
29
|
|
13
30
|
public class TestMaskFilterPlugin {
|
14
31
|
@Rule
|
@@ -17,11 +34,47 @@ public class TestMaskFilterPlugin {
|
|
17
34
|
@Rule
|
18
35
|
public ExpectedException exception = ExpectedException.none();
|
19
36
|
|
37
|
+
private static Value s(String value) {
|
38
|
+
return newString(value);
|
39
|
+
}
|
40
|
+
|
41
|
+
private static Value i(int value) {
|
42
|
+
return newInteger(value);
|
43
|
+
}
|
44
|
+
|
45
|
+
private static Value f(double value) {
|
46
|
+
return newFloat(value);
|
47
|
+
}
|
48
|
+
|
49
|
+
private static Value b(boolean value) {
|
50
|
+
return newBoolean(value);
|
51
|
+
}
|
52
|
+
|
20
53
|
private ConfigSource getConfigFromYaml(String yaml) {
|
21
54
|
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
22
55
|
return loader.fromYamlString(yaml);
|
23
56
|
}
|
24
57
|
|
58
|
+
private String getMaskedCharacters(Object value) {
|
59
|
+
String maskedValue = "";
|
60
|
+
for (int i = 0; i < value.toString().length(); i++) {
|
61
|
+
maskedValue += "*";
|
62
|
+
}
|
63
|
+
return maskedValue;
|
64
|
+
}
|
65
|
+
|
66
|
+
private String getMaskedEmail(String email) {
|
67
|
+
String maskedValue = "";
|
68
|
+
for (int i = 0; i < email.length(); i++) {
|
69
|
+
if (email.charAt(i) == '@') {
|
70
|
+
maskedValue += email.substring(i);
|
71
|
+
break;
|
72
|
+
}
|
73
|
+
maskedValue += "*";
|
74
|
+
}
|
75
|
+
return maskedValue;
|
76
|
+
}
|
77
|
+
|
25
78
|
@Test
|
26
79
|
public void testThrowExceptionAtMissingColumnsField() {
|
27
80
|
String configYaml = "type: mask";
|
@@ -29,6 +82,294 @@ public class TestMaskFilterPlugin {
|
|
29
82
|
|
30
83
|
exception.expect(ConfigException.class);
|
31
84
|
exception.expectMessage("Field 'columns' is required but not set");
|
32
|
-
config.loadConfig(
|
85
|
+
config.loadConfig(PluginTask.class);
|
86
|
+
}
|
87
|
+
|
88
|
+
@Test
|
89
|
+
public void testOnlyMaskTargetColumns() {
|
90
|
+
String configYaml = "" +
|
91
|
+
"type: mask\n" +
|
92
|
+
"columns:\n" +
|
93
|
+
" - { name: _c0}\n" +
|
94
|
+
" - { name: _c2}\n";
|
95
|
+
|
96
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
97
|
+
|
98
|
+
final Schema inputSchema = Schema.builder()
|
99
|
+
.add("_c0", STRING)
|
100
|
+
.add("_c1", STRING)
|
101
|
+
.add("_c2", STRING)
|
102
|
+
.add("_c3", STRING)
|
103
|
+
.build();
|
104
|
+
|
105
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
106
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
107
|
+
@Override
|
108
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
109
|
+
final String c0ColumnValue = "_c0_THIS_MUST_BE_MASKED";
|
110
|
+
final String c1ColumnValue = "_c1_THIS_MUST_NOT_BE_MASKED";
|
111
|
+
final String c2ColumnValue = "_c2_THIS_MUST_BE_MASKED_ALSO";
|
112
|
+
final String c3ColumnValue = "_c3_THIS_MUST_NOT_BE_MASKED_ALSO";
|
113
|
+
|
114
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
115
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
116
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
117
|
+
c0ColumnValue,
|
118
|
+
c1ColumnValue,
|
119
|
+
c2ColumnValue,
|
120
|
+
c3ColumnValue
|
121
|
+
)) {
|
122
|
+
pageOutput.add(page);
|
123
|
+
}
|
124
|
+
pageOutput.finish();
|
125
|
+
}
|
126
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
127
|
+
|
128
|
+
assertEquals(1, records.size());
|
129
|
+
Object[] record = records.get(0);
|
130
|
+
|
131
|
+
assertEquals(4, record.length);
|
132
|
+
assertEquals(getMaskedCharacters(c0ColumnValue), record[0]);
|
133
|
+
assertEquals(c1ColumnValue, record[1]);
|
134
|
+
assertEquals(getMaskedCharacters(c2ColumnValue), record[2]);
|
135
|
+
assertEquals(c3ColumnValue, record[3]);
|
136
|
+
}
|
137
|
+
});
|
138
|
+
}
|
139
|
+
|
140
|
+
@Test
|
141
|
+
public void testPassVarietyOfTypes() {
|
142
|
+
String configYaml = "" +
|
143
|
+
"type: mask\n" +
|
144
|
+
"columns:\n" +
|
145
|
+
" - { name: _dummy}\n";
|
146
|
+
|
147
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
148
|
+
|
149
|
+
final Schema inputSchema = Schema.builder()
|
150
|
+
.add("_c0", STRING)
|
151
|
+
.add("_c1", BOOLEAN)
|
152
|
+
.add("_c2", DOUBLE)
|
153
|
+
.add("_c3", LONG)
|
154
|
+
.add("_c4", TIMESTAMP)
|
155
|
+
.add("_c5", JSON)
|
156
|
+
.build();
|
157
|
+
|
158
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
159
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
160
|
+
@Override
|
161
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
162
|
+
final String c0ColumnValue = "_c0_STRING";
|
163
|
+
final Boolean c1ColumnValue = false;
|
164
|
+
final Double c2ColumnValue = 12345.6789;
|
165
|
+
final Long c3ColumnValue = Long.MAX_VALUE;
|
166
|
+
final Timestamp c4ColumnValue = Timestamp.ofEpochSecond(4);
|
167
|
+
final Value c5ColumnValue = newMapBuilder().put(s("_c5"), s("_v5")).build();
|
168
|
+
|
169
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
170
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
171
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
172
|
+
c0ColumnValue,
|
173
|
+
c1ColumnValue,
|
174
|
+
c2ColumnValue,
|
175
|
+
c3ColumnValue,
|
176
|
+
c4ColumnValue,
|
177
|
+
c5ColumnValue
|
178
|
+
)) {
|
179
|
+
pageOutput.add(page);
|
180
|
+
}
|
181
|
+
pageOutput.finish();
|
182
|
+
}
|
183
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
184
|
+
|
185
|
+
assertEquals(1, records.size());
|
186
|
+
Object[] record = records.get(0);
|
187
|
+
|
188
|
+
assertEquals(6, record.length);
|
189
|
+
assertEquals(c0ColumnValue, record[0]);
|
190
|
+
assertEquals(c1ColumnValue, record[1]);
|
191
|
+
assertEquals(c2ColumnValue, record[2]);
|
192
|
+
assertEquals(c3ColumnValue, record[3]);
|
193
|
+
assertEquals(c4ColumnValue, record[4]);
|
194
|
+
assertEquals(c5ColumnValue, record[5]);
|
195
|
+
}
|
196
|
+
});
|
197
|
+
}
|
198
|
+
|
199
|
+
@Test
|
200
|
+
public void testMaskVarietyOfTypes() {
|
201
|
+
String configYaml = "" +
|
202
|
+
"type: mask\n" +
|
203
|
+
"columns:\n" +
|
204
|
+
" - { name: _c0}\n" +
|
205
|
+
" - { name: _c1}\n" +
|
206
|
+
" - { name: _c2}\n" +
|
207
|
+
" - { name: _c3}\n" +
|
208
|
+
" - { name: _c4}\n";
|
209
|
+
|
210
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
211
|
+
|
212
|
+
final Schema inputSchema = Schema.builder()
|
213
|
+
.add("_c0", STRING)
|
214
|
+
.add("_c1", BOOLEAN)
|
215
|
+
.add("_c2", DOUBLE)
|
216
|
+
.add("_c3", LONG)
|
217
|
+
.add("_c4", TIMESTAMP)
|
218
|
+
.build();
|
219
|
+
|
220
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
221
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
222
|
+
@Override
|
223
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
224
|
+
final String c0ColumnValue = "_c0_STRING";
|
225
|
+
final Boolean c1ColumnValue = false;
|
226
|
+
final Double c2ColumnValue = 12345.6789;
|
227
|
+
final Long c3ColumnValue = Long.MAX_VALUE;
|
228
|
+
final Timestamp c4ColumnValue = Timestamp.ofEpochSecond(4);
|
229
|
+
|
230
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
231
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
232
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
233
|
+
c0ColumnValue,
|
234
|
+
c1ColumnValue,
|
235
|
+
c2ColumnValue,
|
236
|
+
c3ColumnValue,
|
237
|
+
c4ColumnValue
|
238
|
+
)) {
|
239
|
+
pageOutput.add(page);
|
240
|
+
}
|
241
|
+
pageOutput.finish();
|
242
|
+
}
|
243
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
244
|
+
|
245
|
+
assertEquals(1, records.size());
|
246
|
+
Object[] record = records.get(0);
|
247
|
+
|
248
|
+
assertEquals(5, record.length);
|
249
|
+
assertEquals(getMaskedCharacters(c0ColumnValue), record[0]);
|
250
|
+
assertEquals(getMaskedCharacters(c1ColumnValue), record[1]);
|
251
|
+
assertEquals(getMaskedCharacters(c2ColumnValue), record[2]);
|
252
|
+
assertEquals(getMaskedCharacters(c3ColumnValue), record[3]);
|
253
|
+
assertEquals(getMaskedCharacters(c4ColumnValue), record[4]);
|
254
|
+
}
|
255
|
+
});
|
256
|
+
}
|
257
|
+
|
258
|
+
@Test
|
259
|
+
public void testMaskJson() {
|
260
|
+
String configYaml = "" +
|
261
|
+
"type: mask\n" +
|
262
|
+
"columns:\n" +
|
263
|
+
" - { name: _c0}\n" +
|
264
|
+
" - { name: _c1, paths: [{key: $.root.key1}]}\n" +
|
265
|
+
" - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4, pattern: all}]}\n" +
|
266
|
+
" - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7, pattern: email, length: 3}]}\n";
|
267
|
+
|
268
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
269
|
+
|
270
|
+
final Schema inputSchema = Schema.builder()
|
271
|
+
.add("_c0", JSON)
|
272
|
+
.add("_c1", JSON)
|
273
|
+
.add("_c2", JSON)
|
274
|
+
.add("_c3", JSON)
|
275
|
+
.build();
|
276
|
+
|
277
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
278
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
279
|
+
@Override
|
280
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
281
|
+
final Value jsonValue = newMapBuilder().put(
|
282
|
+
s("root"),
|
283
|
+
newMap(
|
284
|
+
s("key1"), s("value1"),
|
285
|
+
s("key2"), i(2),
|
286
|
+
s("key3"), newMap(
|
287
|
+
s("key5"), s("value5"),
|
288
|
+
s("key6"), newArray(i(0), i(1), i(2), i(3), i(4)),
|
289
|
+
s("key7"), s("testme@example.com")
|
290
|
+
),
|
291
|
+
s("key4"), newArray(i(0), i(1), i(2), i(3), i(4))
|
292
|
+
)
|
293
|
+
).build();
|
294
|
+
|
295
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
296
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
297
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
298
|
+
jsonValue,
|
299
|
+
jsonValue,
|
300
|
+
jsonValue,
|
301
|
+
jsonValue
|
302
|
+
)) {
|
303
|
+
pageOutput.add(page);
|
304
|
+
}
|
305
|
+
pageOutput.finish();
|
306
|
+
}
|
307
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
308
|
+
|
309
|
+
assertEquals(1, records.size());
|
310
|
+
Object[] record = records.get(0);
|
311
|
+
|
312
|
+
assertEquals(4, record.length);
|
313
|
+
assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[0].toString());
|
314
|
+
assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[1].toString());
|
315
|
+
assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":\"**\",\"key4\":\"***********\"}}", record[2].toString());
|
316
|
+
assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"***@example.com\"},\"key4\":[0,1,2,3,4]}}", record[3].toString());
|
317
|
+
}
|
318
|
+
});
|
319
|
+
}
|
320
|
+
|
321
|
+
@Test
|
322
|
+
public void testMaskEmail() {
|
323
|
+
String configYaml = "" +
|
324
|
+
"type: mask\n" +
|
325
|
+
"columns:\n" +
|
326
|
+
" - { name: _c0, pattern: email}\n" +
|
327
|
+
" - { name: _c1, pattern: email}\n" +
|
328
|
+
" - { name: _c2, pattern: all}\n" +
|
329
|
+
" - { name: _c3}\n";
|
330
|
+
|
331
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
332
|
+
|
333
|
+
final Schema inputSchema = Schema.builder()
|
334
|
+
.add("_c0", STRING)
|
335
|
+
.add("_c1", STRING)
|
336
|
+
.add("_c2", STRING)
|
337
|
+
.add("_c3", STRING)
|
338
|
+
.add("_c4", STRING)
|
339
|
+
.build();
|
340
|
+
|
341
|
+
final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
|
342
|
+
maskFilterPlugin.transaction(config, inputSchema, new Control() {
|
343
|
+
@Override
|
344
|
+
public void run(TaskSource taskSource, Schema outputSchema) {
|
345
|
+
final String email1 = "dummy_test-me.1234@dummy-mail1.com";
|
346
|
+
final String email2 = "!#$%&'*+-/=?^_`.{|}~@dummy-mail2.com";
|
347
|
+
|
348
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
349
|
+
try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
|
350
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
|
351
|
+
email1,
|
352
|
+
email2,
|
353
|
+
email1,
|
354
|
+
email1,
|
355
|
+
email1
|
356
|
+
)) {
|
357
|
+
pageOutput.add(page);
|
358
|
+
}
|
359
|
+
pageOutput.finish();
|
360
|
+
}
|
361
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
362
|
+
|
363
|
+
assertEquals(1, records.size());
|
364
|
+
Object[] record = records.get(0);
|
365
|
+
|
366
|
+
assertEquals(5, record.length);
|
367
|
+
assertEquals(getMaskedEmail(email1), record[0]);
|
368
|
+
assertEquals(getMaskedEmail(email2), record[1]);
|
369
|
+
assertEquals(getMaskedCharacters(email1), record[2]);
|
370
|
+
assertEquals(getMaskedCharacters(email1), record[3]);
|
371
|
+
assertEquals(email1, record[4]);
|
372
|
+
}
|
373
|
+
});
|
33
374
|
}
|
34
375
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-mask
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tetsuo Yamabe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,7 +61,7 @@ files:
|
|
61
61
|
- src/test/java/org/embulk/filter/mask/TestMaskFilterPlugin.java
|
62
62
|
- classpath/accessors-smart-1.1.jar
|
63
63
|
- classpath/asm-5.0.3.jar
|
64
|
-
- classpath/embulk-filter-mask-0.
|
64
|
+
- classpath/embulk-filter-mask-0.1.1.jar
|
65
65
|
- classpath/json-path-2.2.0.jar
|
66
66
|
- classpath/json-smart-2.2.1.jar
|
67
67
|
- classpath/slf4j-api-1.7.16.jar
|