embulk-filter-mask 0.1.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 131e2785761ffc21130a3351ed57d7e2a96df19d
4
- data.tar.gz: 02d57d7419c31e43d91d19c23f36805ad9f4046f
3
+ metadata.gz: 420f82310881451f5b66848494db06f292c9f755
4
+ data.tar.gz: b3fb787c18bf556cf169d85343a6dacb4030f116
5
5
  SHA512:
6
- metadata.gz: 95c7e11221c993510b6a5ab556f1460397d91d24b7b27740664ee10210465d20866de3743d81bdd0e3f35b024e4ded1135535887d67bdef69b8a6dacd3a386f5
7
- data.tar.gz: e930e6d4510b46db97dd7343edeaa0c7079127d152bd07865d01aefcf388c55622d1eec4225525c9f904a39b0cec6df3235a2aea95d2d7784823782bd0c570af
6
+ metadata.gz: 501044a9ebb52794d3bda67aa9ef947157300be945e798ace60068c7162206c522dfa228eb936659493bf40135d37a2afb5f9fcf9dcf324a36c40b636a5cf1e1
7
+ data.tar.gz: b482c2be719bab2770ba7f34c618d0d2bd611f56103bcec9df102be970965d9defbfb2af6297671d1e78674a4ba5ca1eb39eda773c8c0fe5de3f341b3b6c21c8
@@ -0,0 +1,12 @@
1
+ language: java
2
+ jdk:
3
+ # Tentatively ignore these environments as we face SSL errors
4
+ # https://github.com/gradle/gradle/issues/2421
5
+ #
6
+ # - openjdk7
7
+ # - oraclejdk7
8
+ - oraclejdk8
9
+ script:
10
+ - ./gradlew test
11
+ after_success:
12
+ - ./gradlew jacocoTestReport coveralls
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # Mask filter plugin for Embulk
2
2
 
3
- mask columns with asterisks (still in initial development phase and missing basic functionalities to use in production )
3
+ [![Coverage Status](https://coveralls.io/repos/github/beniyama/embulk-filter-mask/badge.svg)](https://coveralls.io/github/beniyama/embulk-filter-mask)
4
+
5
+ Mask columns with asterisks in a variety of patterns (still in initial development phase and missing basic features to use in production).
4
6
 
5
7
  ## Overview
6
8
 
@@ -8,13 +10,23 @@ mask columns with asterisks (still in initial development phase and missing basi
8
10
 
9
11
  ## Configuration
10
12
 
13
+ *Caution* : Now we use `type` to specify mask types such as `all` and `email`, instead of `pattern` which was used in version 0.1.1 or earlier.
14
+
11
15
  - **columns**: target columns which would be replaced with asterisks (string, required)
12
16
  - **name**: name of the column (string, required)
13
- - **pattern**: mask pattern, `all` or `email` (string, default: `all`)
14
- - **path**: JSON path, works if the column type is JSON (string, default: `$.`)
17
+ - **type**: mask type, `all`, `email`, `regex` or `substring` (string, default: `all`)
18
+ - **paths**: list of JSON path and type, works if the column type is JSON
19
+ - `[{key: $.json_path1}, {key: $.json_path2}]` would mask both `$.json_path1` and `$.json_path2` nodes
20
+ - Elements under the nodes would be converted to string and then masked (e.g., `[0,1,2]` -> `*******`)
21
+ - **length**: if specified, this filter replaces the column with fixed number of asterisks (integer, optional. supported only in `all`, `email`, `substring`.)
22
+ - **pattern**: Regex pattern such as "[0-9]+" (string, required for `regex` type)
23
+ - **start**: The beginning index for `substring` type. The value starts from 0 and inclusive (integer, default: 0)
24
+ - **end**: The ending index for `substring` type. The value is exclusive (integer, default: length of the target column)
15
25
 
16
26
  ## Example
17
27
 
28
+
29
+
18
30
  If you have below data in csv or other format file,
19
31
 
20
32
  |first_name | last_name | gender | age | contact |
@@ -33,7 +45,7 @@ filters:
33
45
  columns:
34
46
  - { name: last_name}
35
47
  - { name: age}
36
- - { name: contact, pattern: email}
48
+ - { name: contact, type: email, length: 5}
37
49
  ```
38
50
 
39
51
  would produce
@@ -46,9 +58,29 @@ would produce
46
58
  | Christian | **** | male | ** | *****@example.com |
47
59
  | Amy | ***** | female | ** | *****@example.com |
48
60
 
61
+ If you use `regex` and/or `substring` types,
62
+
63
+ ```yaml
64
+ filters:
65
+ - type: mask
66
+ columns:
67
+ - { name: last_name, type: regex, pattern: "[a-z]"}
68
+ - { name: contact, type: substring, start: 5, length: 5}
69
+ ```
70
+
71
+ would produce
72
+
73
+ |first_name | last_name | gender | age | contact |
74
+ |---|---|---|---|---|
75
+ | B******* | Bell | male | 30 | bell.***** |
76
+ | L**** | Duncan | male | 20 | lucas***** |
77
+ | E******* | May | female | 25 | eliza***** |
78
+ | C******** | Reid | male | 15 | chris***** |
79
+ | A** | Avery | female | 40 | amy.a***** |
80
+
49
81
  JSON type column is also partially supported.
50
82
 
51
- If you have
83
+ If you have a `user` column with this JSON data structure
52
84
 
53
85
  ```json
54
86
  {
@@ -57,7 +89,8 @@ If you have
57
89
  "last_name": "Bell"
58
90
  },
59
91
  "gender": "male",
60
- "age": 30
92
+ "age": 30,
93
+ "email": "test_mail@example.com"
61
94
  }
62
95
  ```
63
96
 
@@ -67,8 +100,7 @@ below filter configuration
67
100
  filters:
68
101
  - type: mask
69
102
  columns:
70
- - { name: full_name, path: $.first_name}
71
- - { name: age, path: $.}
103
+ - { name: user, paths: [{key: $.full_name.first_name}, {key: $.email, type: email}]}
72
104
  ```
73
105
 
74
106
  would produce
@@ -80,7 +112,8 @@ would produce
80
112
  "last_name": "Bell"
81
113
  },
82
114
  "gender": "male",
83
- "age": **
115
+ "age": 30,
116
+ "email": "*********@example.com"
84
117
  }
85
118
  ```
86
119
 
@@ -1,8 +1,10 @@
1
1
  plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
3
  id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "com.github.kt3k.coveralls" version "2.8.1"
4
5
  id "java"
5
6
  id "checkstyle"
7
+ id "jacoco"
6
8
  }
7
9
  import com.github.jrubygradle.JRubyExec
8
10
  repositories {
@@ -13,17 +15,24 @@ configurations {
13
15
  provided
14
16
  }
15
17
 
16
- version = "0.1.1"
18
+ version = "0.2.1"
17
19
 
18
20
  sourceCompatibility = 1.7
19
21
  targetCompatibility = 1.7
20
22
 
21
23
  dependencies {
22
- compile "org.embulk:embulk-core:0.8.15"
23
- provided "org.embulk:embulk-core:0.8.15"
24
+ compile "org.embulk:embulk-core:0.8.29"
25
+ provided "org.embulk:embulk-core:0.8.29"
24
26
  compile "com.jayway.jsonpath:json-path:2.+"
25
27
  testCompile "junit:junit:4.+"
26
- testCompile "org.embulk:embulk-core:0.8.15:tests"
28
+ testCompile "org.embulk:embulk-core:0.8.29:tests"
29
+ }
30
+
31
+ jacocoTestReport {
32
+ reports {
33
+ xml.enabled = true // coveralls plugin depends on xml format report
34
+ html.enabled = true
35
+ }
27
36
  }
28
37
 
29
38
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -1,6 +1,6 @@
1
- #Tue Jul 12 16:30:09 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-all.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -30,7 +30,7 @@ public class MaskFilterPlugin implements FilterPlugin {
30
30
  String getName();
31
31
 
32
32
  @Config("type")
33
- @ConfigDefault("\"string\"")
33
+ @ConfigDefault("\"all\"")
34
34
  Optional<String> getType();
35
35
 
36
36
  @Config("pattern")
@@ -41,6 +41,14 @@ public class MaskFilterPlugin implements FilterPlugin {
41
41
  @ConfigDefault("null")
42
42
  Optional<Integer> getLength();
43
43
 
44
+ @Config("start")
45
+ @ConfigDefault("null")
46
+ Optional<Integer> getStart();
47
+
48
+ @Config("end")
49
+ @ConfigDefault("null")
50
+ Optional<Integer> getEnd();
51
+
44
52
  @Config("paths")
45
53
  @ConfigDefault("null")
46
54
  Optional<List<Map<String, String>>> getPaths();
@@ -7,6 +7,7 @@ import org.embulk.config.TaskSource;
7
7
  import org.embulk.spi.*;
8
8
  import org.embulk.spi.json.JsonParser;
9
9
  import org.embulk.spi.time.Timestamp;
10
+ import org.embulk.spi.type.Type;
10
11
  import org.embulk.spi.type.Types;
11
12
  import org.embulk.filter.mask.MaskFilterPlugin.*;
12
13
  import org.msgpack.value.Value;
@@ -67,62 +68,87 @@ public class MaskPageOutput implements PageOutput {
67
68
  continue;
68
69
  }
69
70
 
70
- Object inputValue;
71
- if (Types.STRING.equals(inputColumn.getType())) {
71
+ String name = inputColumn.getName();
72
+ Type type = inputColumn.getType();
73
+
74
+ if (Types.STRING.equals(type)) {
72
75
  final String value = reader.getString(inputColumn);
73
- inputValue = value;
74
- builder.setString(inputColumn, value);
75
- } else if (Types.BOOLEAN.equals(inputColumn.getType())) {
76
+ if (maskColumnMap.containsKey(name)) {
77
+ builder.setString(inputColumn, maskAsString(name, value));
78
+ } else {
79
+ builder.setString(inputColumn, value);
80
+ }
81
+ } else if (Types.BOOLEAN.equals(type)) {
76
82
  final boolean value = reader.getBoolean(inputColumn);
77
- inputValue = value;
78
- builder.setBoolean(inputColumn, value);
79
- } else if (Types.DOUBLE.equals(inputColumn.getType())) {
83
+ if (maskColumnMap.containsKey(name)) {
84
+ builder.setString(inputColumn, maskAsString(name, value));
85
+ } else {
86
+ builder.setBoolean(inputColumn, value);
87
+ }
88
+ } else if (Types.DOUBLE.equals(type)) {
80
89
  final double value = reader.getDouble(inputColumn);
81
- inputValue = value;
82
- builder.setDouble(inputColumn, value);
83
- } else if (Types.LONG.equals(inputColumn.getType())) {
90
+ if (maskColumnMap.containsKey(name)) {
91
+ builder.setString(inputColumn, maskAsString(name, value));
92
+ } else {
93
+ builder.setDouble(inputColumn, value);
94
+ }
95
+ } else if (Types.LONG.equals(type)) {
84
96
  final long value = reader.getLong(inputColumn);
85
- inputValue = value;
86
- builder.setLong(inputColumn, value);
87
- } else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
97
+ if (maskColumnMap.containsKey(name)) {
98
+ builder.setString(inputColumn, maskAsString(name, value));
99
+ } else {
100
+ builder.setLong(inputColumn, value);
101
+ }
102
+ } else if (Types.TIMESTAMP.equals(type)) {
88
103
  final Timestamp value = reader.getTimestamp(inputColumn);
89
- inputValue = value;
90
- builder.setTimestamp(inputColumn, value);
91
- } else if (Types.JSON.equals(inputColumn.getType())) {
104
+ if (maskColumnMap.containsKey(name)) {
105
+ builder.setString(inputColumn, maskAsString(name, value));
106
+ } else {
107
+ builder.setTimestamp(inputColumn, value);
108
+ }
109
+ } else if (Types.JSON.equals(type)) {
92
110
  final Value value = reader.getJson(inputColumn);
93
- inputValue = value;
94
- builder.setJson(inputColumn, value);
111
+ if (maskColumnMap.containsKey(name)) {
112
+ builder.setJson(inputColumn, maskAsJson(name, value));
113
+ } else {
114
+ builder.setJson(inputColumn, value);
115
+ }
95
116
  } else {
96
- throw new DataException("Unexpected type:" + inputColumn.getType());
117
+ throw new DataException("Unexpected type:" + type);
97
118
  }
119
+ }
120
+ }
98
121
 
99
- if (maskColumnMap.containsKey(inputColumn.getName())) {
100
- MaskColumn maskColumn = maskColumnMap.get(inputColumn.getName());
101
-
102
- if (Types.JSON.equals(inputColumn.getType())) {
103
- Value inputJson = (Value) inputValue;
104
- DocumentContext context = parseContext.parse(inputJson.toJson());
105
- List<Map<String, String>> paths = maskColumn.getPaths().or(new ArrayList<Map<String, String>>());
106
-
107
- for (Map<String, String> path : paths) {
108
- String key = path.get("key");
109
- String pattern = path.containsKey("pattern") ? path.get("pattern") : "all";
110
- int maskLength = path.containsKey("length") ? Integer.parseInt(path.get("length")) : 0;
111
- Object element = context.read(key);
112
- if (!key.equals("$") && element != null) {
113
- String maskedValue = mask(element, pattern, maskLength);
114
- String maskedJson = context.set(key, new TextNode(maskedValue).asText()).jsonString();
115
- builder.setJson(inputColumn, jsonParser.parse(maskedJson));
116
- }
117
- }
118
- } else {
119
- String pattern = maskColumn.getPattern().get();
120
- int maskLength = maskColumn.getLength().or(0);
121
- String maskedString = mask(inputValue, pattern, maskLength);
122
- builder.setString(inputColumn, maskedString);
123
- }
122
+ private String maskAsString(String name, Object value) {
123
+ MaskColumn maskColumn = maskColumnMap.get(name);
124
+ String type = maskColumn.getType().get();
125
+ String pattern = maskColumn.getPattern().or("");
126
+ Integer length = maskColumn.getLength().or(-1);
127
+ Integer start = maskColumn.getStart().or(-1);
128
+ Integer end = maskColumn.getEnd().or(-1);
129
+
130
+ return mask(type, value, pattern, length, start, end);
131
+ }
132
+
133
+ private Value maskAsJson(String name, Value value) {
134
+ MaskColumn maskColumn = maskColumnMap.get(name);
135
+ DocumentContext context = parseContext.parse(value.toJson());
136
+ List<Map<String, String>> paths = maskColumn.getPaths().or(new ArrayList<Map<String, String>>());
137
+
138
+ for (Map<String, String> path : paths) {
139
+ String key = path.get("key");
140
+ String type = path.containsKey("type") ? path.get("type") : "all";
141
+ String pattern = path.containsKey("pattern") ? path.get("pattern") : "";
142
+ Integer length = path.containsKey("length") ? Integer.parseInt(path.get("length")) : -1;
143
+ Integer start = path.containsKey("start") ? Integer.parseInt(path.get("start")) : -1;
144
+ Integer end = path.containsKey("end") ? Integer.parseInt(path.get("end")) : -1;
145
+ Object element = context.read(key);
146
+ if (!key.equals("$") && element != null) {
147
+ String maskedValue = mask(type, element, pattern, length, start, end);
148
+ context.set(key, new TextNode(maskedValue).asText()).jsonString();
124
149
  }
125
150
  }
151
+ return jsonParser.parse(context.jsonString());
126
152
  }
127
153
 
128
154
  @Override
@@ -135,25 +161,61 @@ public class MaskPageOutput implements PageOutput {
135
161
  builder.close();
136
162
  }
137
163
 
138
- private String mask(Object value, String pattern, Integer length) {
164
+ private String mask(String type, Object value, String pattern, Integer length, Integer start, Integer end) {
139
165
  String maskedValue;
140
166
  String nakedValue = value.toString();
141
- if (pattern.equals("email")) {
142
- if (length > 0) {
143
- String maskPattern = StringUtils.repeat("*", length) + "@$1";
144
- maskedValue = nakedValue.replaceFirst("^.+?@(.+)$", maskPattern);
145
- } else {
146
- maskedValue = nakedValue.replaceAll(".(?=[^@]*@)", "*");
147
- }
148
- } else if (pattern.equals("all")) {
149
- if (length > 0) {
150
- maskedValue = StringUtils.repeat("*", length);
151
- } else {
152
- maskedValue = nakedValue.replaceAll(".", "*");
153
- }
167
+ if (type.equals("regex")) {
168
+ maskedValue = maskRegex(nakedValue, pattern);
169
+ } else if (type.equals("substring")) {
170
+ maskedValue = maskSubstring(nakedValue, start, end, length);
171
+ } else if (type.equals("email")) {
172
+ maskedValue = maskEmail(nakedValue, length);
173
+ } else if (type.equals("all")) {
174
+ maskedValue = maskAll(nakedValue, length);
154
175
  } else {
155
176
  maskedValue = nakedValue;
156
177
  }
157
178
  return maskedValue;
158
179
  }
180
+
181
+ private String maskAll(Object value, Integer length) {
182
+ String maskedValue;
183
+ String nakedValue = value.toString();
184
+ if (length > 0) {
185
+ maskedValue = StringUtils.repeat("*", length);
186
+ } else {
187
+ maskedValue = nakedValue.replaceAll(".", "*");
188
+ }
189
+ return maskedValue;
190
+ }
191
+
192
+ private String maskEmail(Object value, Integer length) {
193
+ String maskedValue;
194
+ String nakedValue = value.toString();
195
+ if (length > 0) {
196
+ String maskPattern = StringUtils.repeat("*", length) + "@$1";
197
+ maskedValue = nakedValue.replaceFirst("^.+?@(.+)$", maskPattern);
198
+ } else {
199
+ maskedValue = nakedValue.replaceAll(".(?=[^@]*@)", "*");
200
+ }
201
+ return maskedValue;
202
+ }
203
+
204
+ private String maskRegex(Object value, String pattern) {
205
+ String nakedValue = value.toString();
206
+ return nakedValue.replaceAll(pattern, "*");
207
+ }
208
+
209
+ private String maskSubstring(Object value, Integer start, Integer end, Integer length) {
210
+ String nakedValue = value.toString();
211
+
212
+ if (nakedValue.length() <= start || (0 <= end && (end - 1) <= start)) return nakedValue;
213
+
214
+ start = start < 0 ? 0 : start;
215
+ end = (end < 0 || nakedValue.length() <= end) ? nakedValue.length() : end;
216
+ int repeat = length > 0 ? length : end - start;
217
+
218
+ StringBuffer buffer = new StringBuffer(nakedValue);
219
+ return buffer.replace(start, end, StringUtils.repeat("*", repeat)).toString();
220
+ }
159
221
  }
@@ -262,8 +262,10 @@ public class TestMaskFilterPlugin {
262
262
  "columns:\n" +
263
263
  " - { name: _c0}\n" +
264
264
  " - { name: _c1, paths: [{key: $.root.key1}]}\n" +
265
- " - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4, pattern: all}]}\n" +
266
- " - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7, pattern: email, length: 3}]}\n";
265
+ " - { name: _c2, paths: [{key: $.root.key3, length: 2}, {key: $.root.key4, type: all}]}\n" +
266
+ " - { name: _c3, paths: [{key: $.root.key1}, {key: $.root.key3.key7, type: email, length: 3}]}\n" +
267
+ " - { name: _c4, paths: [{key: $.root.key1, type: regex, pattern: \"[0-9]\"}]}\n" +
268
+ " - { name: _c5, paths: [{key: $.root.key1, type: substring, start: 2, end: 4, length: 5}]}\n";
267
269
 
268
270
  ConfigSource config = getConfigFromYaml(configYaml);
269
271
 
@@ -272,6 +274,8 @@ public class TestMaskFilterPlugin {
272
274
  .add("_c1", JSON)
273
275
  .add("_c2", JSON)
274
276
  .add("_c3", JSON)
277
+ .add("_c4", JSON)
278
+ .add("_c5", JSON)
275
279
  .build();
276
280
 
277
281
  final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
@@ -295,6 +299,8 @@ public class TestMaskFilterPlugin {
295
299
  MockPageOutput mockPageOutput = new MockPageOutput();
296
300
  try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
297
301
  for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
302
+ jsonValue,
303
+ jsonValue,
298
304
  jsonValue,
299
305
  jsonValue,
300
306
  jsonValue,
@@ -309,11 +315,13 @@ public class TestMaskFilterPlugin {
309
315
  assertEquals(1, records.size());
310
316
  Object[] record = records.get(0);
311
317
 
312
- assertEquals(4, record.length);
318
+ assertEquals(6, record.length);
313
319
  assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[0].toString());
314
320
  assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[1].toString());
315
321
  assertEquals("{\"root\":{\"key1\":\"value1\",\"key2\":2,\"key3\":\"**\",\"key4\":\"***********\"}}", record[2].toString());
316
322
  assertEquals("{\"root\":{\"key1\":\"******\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"***@example.com\"},\"key4\":[0,1,2,3,4]}}", record[3].toString());
323
+ assertEquals("{\"root\":{\"key1\":\"value*\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[4].toString());
324
+ assertEquals("{\"root\":{\"key1\":\"va*****e1\",\"key2\":2,\"key3\":{\"key5\":\"value5\",\"key6\":[0,1,2,3,4],\"key7\":\"testme@example.com\"},\"key4\":[0,1,2,3,4]}}", record[5].toString());
317
325
  }
318
326
  });
319
327
  }
@@ -323,9 +331,9 @@ public class TestMaskFilterPlugin {
323
331
  String configYaml = "" +
324
332
  "type: mask\n" +
325
333
  "columns:\n" +
326
- " - { name: _c0, pattern: email}\n" +
327
- " - { name: _c1, pattern: email}\n" +
328
- " - { name: _c2, pattern: all}\n" +
334
+ " - { name: _c0, type: email}\n" +
335
+ " - { name: _c1, type: email}\n" +
336
+ " - { name: _c2, type: all}\n" +
329
337
  " - { name: _c3}\n";
330
338
 
331
339
  ConfigSource config = getConfigFromYaml(configYaml);
@@ -372,4 +380,126 @@ public class TestMaskFilterPlugin {
372
380
  }
373
381
  });
374
382
  }
383
+
384
+ @Test
385
+ public void testRegexMaskType() {
386
+ String configYaml = "" +
387
+ "type: mask\n" +
388
+ "columns:\n" +
389
+ " - { name: _c1, type: regex, pattern: \"abc\" }\n" +
390
+ " - { name: _c2, type: regex, pattern: \"(abc)\" }\n" +
391
+ " - { name: _c3, type: regex, pattern: \"[0-9]+\" }\n" +
392
+ " - { name: _c4, type: regex, pattern: \"[0-9]\" }\n";
393
+
394
+ ConfigSource config = getConfigFromYaml(configYaml);
395
+
396
+ final Schema inputSchema = Schema.builder()
397
+ .add("_c0", STRING)
398
+ .add("_c1", STRING)
399
+ .add("_c2", STRING)
400
+ .add("_c3", STRING)
401
+ .add("_c4", STRING)
402
+ .build();
403
+
404
+ final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
405
+ maskFilterPlugin.transaction(config, inputSchema, new Control() {
406
+ @Override
407
+ public void run(TaskSource taskSource, Schema outputSchema) {
408
+ final String c0ColumnValue = "_c0_abcdefghi01234";
409
+ final String c1ColumnValue = "_c1_abcdefghi01234";
410
+ final String c2ColumnValue = "_c2_abcdefghi01234";
411
+ final String c3ColumnValue = "_c3_abcdefghi01234";
412
+ final String c4ColumnValue = "_c4_abcdefghi01234";
413
+
414
+ MockPageOutput mockPageOutput = new MockPageOutput();
415
+ try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
416
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
417
+ c0ColumnValue,
418
+ c1ColumnValue,
419
+ c2ColumnValue,
420
+ c3ColumnValue,
421
+ c4ColumnValue
422
+ )) {
423
+ pageOutput.add(page);
424
+ }
425
+ pageOutput.finish();
426
+ }
427
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
428
+
429
+ assertEquals(1, records.size());
430
+ Object[] record = records.get(0);
431
+
432
+ assertEquals(5, record.length);
433
+ assertEquals("_c0_abcdefghi01234", record[0]);
434
+ assertEquals("_c1_*defghi01234", record[1]);
435
+ assertEquals("_c2_*defghi01234", record[2]);
436
+ assertEquals("_c*_abcdefghi*", record[3]);
437
+ assertEquals("_c*_abcdefghi*****", record[4]);
438
+ }
439
+ });
440
+ }
441
+
442
+ @Test
443
+ public void testSubstringMaskType() {
444
+ String configYaml = "" +
445
+ "type: mask\n" +
446
+ "columns:\n" +
447
+ " - { name: _c0, type: substring }\n" +
448
+ " - { name: _c1, type: substring, start: 2, end: 5 }\n" +
449
+ " - { name: _c2, type: substring, start: 6 }\n" +
450
+ " - { name: _c3, type: substring, end: 4 }\n" +
451
+ " - { name: _c4, type: substring, start: 3, length: 5 }\n" +
452
+ " - { name: _c5, type: substring, start: 3, end: 2, length: 5 }\n"; // invalid configuration
453
+
454
+ ConfigSource config = getConfigFromYaml(configYaml);
455
+
456
+ final Schema inputSchema = Schema.builder()
457
+ .add("_c0", STRING)
458
+ .add("_c1", STRING)
459
+ .add("_c2", STRING)
460
+ .add("_c3", STRING)
461
+ .add("_c4", STRING)
462
+ .add("_c5", STRING)
463
+ .build();
464
+
465
+ final MaskFilterPlugin maskFilterPlugin = new MaskFilterPlugin();
466
+ maskFilterPlugin.transaction(config, inputSchema, new Control() {
467
+ @Override
468
+ public void run(TaskSource taskSource, Schema outputSchema) {
469
+ final String c0ColumnValue = "_c0_abcdefghi01234";
470
+ final String c1ColumnValue = "_c1_abcdefghi01234";
471
+ final String c2ColumnValue = "_c2_abcdefghi01234";
472
+ final String c3ColumnValue = "_c3_abcdefghi01234";
473
+ final String c4ColumnValue = "_c4_abcdefghi01234";
474
+ final String c5ColumnValue = "_c5_abcdefghi01234";
475
+
476
+ MockPageOutput mockPageOutput = new MockPageOutput();
477
+ try (PageOutput pageOutput = maskFilterPlugin.open(taskSource, inputSchema, outputSchema, mockPageOutput)) {
478
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema,
479
+ c0ColumnValue,
480
+ c1ColumnValue,
481
+ c2ColumnValue,
482
+ c3ColumnValue,
483
+ c4ColumnValue,
484
+ c5ColumnValue
485
+ )) {
486
+ pageOutput.add(page);
487
+ }
488
+ pageOutput.finish();
489
+ }
490
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
491
+
492
+ assertEquals(1, records.size());
493
+ Object[] record = records.get(0);
494
+
495
+ assertEquals(6, record.length);
496
+ assertEquals("******************", record[0]);
497
+ assertEquals("_c***bcdefghi01234", record[1]);
498
+ assertEquals("_c2_ab************", record[2]);
499
+ assertEquals("****abcdefghi01234", record[3]);
500
+ assertEquals("_c4*****", record[4]);
501
+ assertEquals("_c5_abcdefghi01234", record[5]);
502
+ }
503
+ });
504
+ }
375
505
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-mask
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tetsuo Yamabe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-06 00:00:00.000000000 Z
11
+ date: 2017-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -46,6 +46,7 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
+ - .travis.yml
49
50
  - LICENSE.txt
50
51
  - README.md
51
52
  - build.gradle
@@ -59,12 +60,12 @@ files:
59
60
  - src/main/java/org/embulk/filter/mask/MaskFilterPlugin.java
60
61
  - src/main/java/org/embulk/filter/mask/MaskPageOutput.java
61
62
  - src/test/java/org/embulk/filter/mask/TestMaskFilterPlugin.java
62
- - classpath/accessors-smart-1.1.jar
63
- - classpath/asm-5.0.3.jar
64
- - classpath/embulk-filter-mask-0.1.1.jar
65
- - classpath/json-path-2.2.0.jar
66
- - classpath/json-smart-2.2.1.jar
67
- - classpath/slf4j-api-1.7.16.jar
63
+ - classpath/accessors-smart-1.2.jar
64
+ - classpath/asm-5.0.4.jar
65
+ - classpath/embulk-filter-mask-0.2.1.jar
66
+ - classpath/json-path-2.4.0.jar
67
+ - classpath/json-smart-2.3.jar
68
+ - classpath/slf4j-api-1.7.25.jar
68
69
  homepage: https://github.com/beniyama/embulk-filter-mask
69
70
  licenses:
70
71
  - MIT