embulk-filter-column 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +8 -0
- data/build.gradle +7 -4
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +1 -1
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +41 -4
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +48 -30
- data/src/test/java/org/embulk/filter/column/TestJsonColumn.java +57 -0
- data/src/test/java/org/embulk/filter/column/TestJsonVisitor.java +408 -0
- metadata +5 -4
- data/src/test/java/org/embulk/filter/TestColumnFilterPlugin.java +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1faf79742c3c3eefadac1c9c31884e335df93761
|
4
|
+
data.tar.gz: 1b3b85f3b54c51bcc26b8dde92f6c7e8c5728238
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a5cbcd1e4b1dbf79fcdd66dc5220f6e9023c9416880824fa4f3de0d13b3685e8243a3104bb427f0c222eb7c1ac17f021786aaf33b9564ee5c487d9218faafa3
|
7
|
+
data.tar.gz: 181ea5b27402a4d95482fade22bba2a136eb69d54808289a44b4790bcfb1d345e402b063663df36f45180082a39fa318e8cda82eef2192f7b16b9bc528c6a428
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
data/build.gradle
CHANGED
@@ -3,6 +3,7 @@ plugins {
|
|
3
3
|
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
4
|
id "java"
|
5
5
|
id "checkstyle"
|
6
|
+
id "jacoco"
|
6
7
|
}
|
7
8
|
import com.github.jrubygradle.JRubyExec
|
8
9
|
repositories {
|
@@ -13,15 +14,17 @@ configurations {
|
|
13
14
|
provided
|
14
15
|
}
|
15
16
|
|
16
|
-
version = "0.5.
|
17
|
+
version = "0.5.1"
|
17
18
|
sourceCompatibility = 1.7
|
18
19
|
targetCompatibility = 1.7
|
19
20
|
|
20
21
|
dependencies {
|
21
|
-
compile "org.embulk:embulk-core:0.8
|
22
|
-
provided "org.embulk:embulk-core:0.8
|
23
|
-
|
22
|
+
compile "org.embulk:embulk-core:0.8.+"
|
23
|
+
provided "org.embulk:embulk-core:0.8.+"
|
24
|
+
|
24
25
|
testCompile "junit:junit:4.+"
|
26
|
+
testCompile "org.embulk:embulk-core:0.8.+:tests"
|
27
|
+
testCompile "org.embulk:embulk-standards:0.8.+"
|
25
28
|
}
|
26
29
|
|
27
30
|
checkstyle {
|
@@ -106,7 +106,7 @@ public class ColumnFilterPlugin implements FilterPlugin
|
|
106
106
|
}
|
107
107
|
}
|
108
108
|
|
109
|
-
|
109
|
+
static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
110
110
|
{
|
111
111
|
List<ColumnConfig> columns = task.getColumns();
|
112
112
|
List<ColumnConfig> addColumns = task.getAddColumns();
|
@@ -2,6 +2,7 @@ package org.embulk.filter.column;
|
|
2
2
|
|
3
3
|
import org.embulk.config.ConfigException;
|
4
4
|
import org.embulk.spi.type.Type;
|
5
|
+
import org.msgpack.value.IntegerValue;
|
5
6
|
import org.msgpack.value.StringValue;
|
6
7
|
import org.msgpack.value.Value;
|
7
8
|
import org.msgpack.value.ValueFactory;
|
@@ -16,12 +17,14 @@ public class JsonColumn
|
|
16
17
|
private StringValue pathValue = null;
|
17
18
|
private String parentPath = null;
|
18
19
|
private String baseName = null;
|
20
|
+
private Long baseIndex = null;
|
19
21
|
private StringValue parentPathValue = null;
|
20
22
|
private StringValue baseNameValue = null;
|
21
23
|
|
22
24
|
private StringValue srcValue = null;
|
23
25
|
private String srcParentPath = null;
|
24
26
|
private String srcBaseName = null;
|
27
|
+
private Long srcBaseIndex = null;
|
25
28
|
private StringValue srcParentPathValue = null;
|
26
29
|
private StringValue srcBaseNameValue = null;
|
27
30
|
|
@@ -45,12 +48,14 @@ public class JsonColumn
|
|
45
48
|
this.pathValue = ValueFactory.newString(path);
|
46
49
|
this.parentPath = parentPath(path);
|
47
50
|
this.baseName = baseName(path);
|
51
|
+
this.baseIndex = baseIndex(path);
|
48
52
|
this.parentPathValue = ValueFactory.newString(parentPath);
|
49
53
|
this.baseNameValue = ValueFactory.newString(baseName);
|
50
54
|
|
51
55
|
this.srcValue = ValueFactory.newString(this.src);
|
52
56
|
this.srcParentPath = parentPath(this.src);
|
53
57
|
this.srcBaseName = baseName(this.src);
|
58
|
+
this.srcBaseIndex = baseIndex(this.src);
|
54
59
|
this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
|
55
60
|
this.srcBaseNameValue = ValueFactory.newString(this.srcBaseName);
|
56
61
|
|
@@ -94,6 +99,11 @@ public class JsonColumn
|
|
94
99
|
return baseName;
|
95
100
|
}
|
96
101
|
|
102
|
+
public Long getBaseIndex()
|
103
|
+
{
|
104
|
+
return baseIndex;
|
105
|
+
}
|
106
|
+
|
97
107
|
public StringValue getParentPathValue()
|
98
108
|
{
|
99
109
|
return parentPathValue;
|
@@ -119,6 +129,11 @@ public class JsonColumn
|
|
119
129
|
return srcBaseName;
|
120
130
|
}
|
121
131
|
|
132
|
+
public Long getSrcBaseIndex()
|
133
|
+
{
|
134
|
+
return srcBaseIndex;
|
135
|
+
}
|
136
|
+
|
122
137
|
public StringValue getSrcParentPathValue()
|
123
138
|
{
|
124
139
|
return srcParentPathValue;
|
@@ -134,8 +149,7 @@ public class JsonColumn
|
|
134
149
|
{
|
135
150
|
String[] parts = path.split("\\.");
|
136
151
|
StringBuilder builder = new StringBuilder();
|
137
|
-
|
138
|
-
for (int i = 1; i < parts.length - 1; i++) {
|
152
|
+
for (int i = 0; i < parts.length - 1; i++) {
|
139
153
|
builder.append(".").append(parts[i]);
|
140
154
|
}
|
141
155
|
if (parts[parts.length - 1].contains("[")) {
|
@@ -145,12 +159,35 @@ public class JsonColumn
|
|
145
159
|
builder.append("[").append(arrayParts[j]);
|
146
160
|
}
|
147
161
|
}
|
148
|
-
return builder.toString();
|
162
|
+
return builder.deleteCharAt(0).toString();
|
149
163
|
}
|
150
164
|
|
151
165
|
public static String baseName(String path)
|
152
166
|
{
|
153
167
|
String[] parts = path.split("\\.");
|
154
|
-
|
168
|
+
String[] arrayParts = parts[parts.length - 1].split("\\[");
|
169
|
+
if (arrayParts.length == 1) { // no [i]
|
170
|
+
return arrayParts[arrayParts.length - 1];
|
171
|
+
}
|
172
|
+
else {
|
173
|
+
return "[" + arrayParts[arrayParts.length - 1];
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
public static Long baseIndex(String path)
|
178
|
+
{
|
179
|
+
String baseName = baseName(path);
|
180
|
+
if (baseName.startsWith("[") && baseName.endsWith("]")) {
|
181
|
+
String baseIndex = baseName.substring(1, baseName.length() - 1);
|
182
|
+
try {
|
183
|
+
return Long.parseLong(baseIndex);
|
184
|
+
}
|
185
|
+
catch (NumberFormatException e) {
|
186
|
+
return null;
|
187
|
+
}
|
188
|
+
}
|
189
|
+
else {
|
190
|
+
return null;
|
191
|
+
}
|
155
192
|
}
|
156
193
|
}
|
@@ -30,14 +30,14 @@ import java.util.Map;
|
|
30
30
|
|
31
31
|
public class JsonVisitor
|
32
32
|
{
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
33
|
+
static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
34
|
+
final PluginTask task;
|
35
|
+
final Schema inputSchema;
|
36
|
+
final Schema outputSchema;
|
37
|
+
final HashSet<String> shouldVisitSet = new HashSet<>();
|
38
|
+
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
|
39
|
+
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
|
40
|
+
final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
|
41
41
|
|
42
42
|
JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
|
43
43
|
{
|
@@ -132,7 +132,7 @@ public class JsonVisitor
|
|
132
132
|
}
|
133
133
|
if (column.getSrc().isPresent()) {
|
134
134
|
String src = column.getSrc().get();
|
135
|
-
|
135
|
+
jsonColumnsPut(name, new JsonColumn(name, null, null, src));
|
136
136
|
}
|
137
137
|
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
138
138
|
Type type = column.getType().get();
|
@@ -190,7 +190,7 @@ public class JsonVisitor
|
|
190
190
|
partialPath.append(".").append(arrayParts[0]);
|
191
191
|
this.shouldVisitSet.add(partialPath.toString());
|
192
192
|
for (int j = 1; j < arrayParts.length; j++) {
|
193
|
-
//
|
193
|
+
// Simply add [0] or [*] here
|
194
194
|
partialPath.append("[").append(arrayParts[j]);
|
195
195
|
this.shouldVisitSet.add(partialPath.toString());
|
196
196
|
}
|
@@ -234,28 +234,43 @@ public class JsonVisitor
|
|
234
234
|
for (int i = 0; i < size; i++) {
|
235
235
|
String newPath = newArrayJsonPath(rootPath, i);
|
236
236
|
if (! jsonDropColumns.contains(newPath)) {
|
237
|
-
|
237
|
+
Value v = arrayValue.get(i);
|
238
|
+
newValue.add(j++, visit(newPath, v));
|
238
239
|
}
|
239
240
|
}
|
240
241
|
}
|
241
242
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
if (
|
246
|
-
|
243
|
+
for (JsonColumn jsonColumn : this.jsonColumns.get(rootPath).values()) {
|
244
|
+
int src = jsonColumn.getSrcBaseIndex().intValue();
|
245
|
+
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
246
|
+
if (v == null) {
|
247
|
+
v = jsonColumn.getDefaultValue();
|
247
248
|
}
|
249
|
+
String newPath = jsonColumn.getPath();
|
250
|
+
Value visited = visit(newPath, v);
|
251
|
+
// int i = jsonColumn.getBaseIndex().intValue();
|
252
|
+
// index is shifted, so j++ is used.
|
253
|
+
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
248
254
|
}
|
249
255
|
}
|
250
256
|
else {
|
251
257
|
for (int i = 0; i < size; i++) {
|
252
258
|
String newPath = newArrayJsonPath(rootPath, i);
|
253
|
-
|
259
|
+
Value v = arrayValue.get(i);
|
260
|
+
newValue.add(j++, visit(newPath, v));
|
254
261
|
}
|
255
262
|
}
|
256
263
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
257
264
|
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
258
|
-
|
265
|
+
int src = jsonColumn.getSrcBaseIndex().intValue();
|
266
|
+
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
267
|
+
if (v == null) {
|
268
|
+
v = jsonColumn.getDefaultValue();
|
269
|
+
}
|
270
|
+
String newPath = jsonColumn.getPath();
|
271
|
+
Value visited = visit(newPath, v);
|
272
|
+
// this ignores specified index, but appends to last now
|
273
|
+
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
259
274
|
}
|
260
275
|
}
|
261
276
|
return ValueFactory.newArray(newValue.toArray(new Value[0]), true);
|
@@ -281,17 +296,16 @@ public class JsonVisitor
|
|
281
296
|
}
|
282
297
|
else if (this.jsonColumns.containsKey(rootPath)) {
|
283
298
|
Map<Value, Value> map = mapValue.map();
|
284
|
-
|
285
|
-
for (JsonColumn jsonColumn : jsonColumns.values()) {
|
299
|
+
for (JsonColumn jsonColumn : this.jsonColumns.get(rootPath).values()) {
|
286
300
|
Value src = jsonColumn.getSrcBaseNameValue();
|
287
301
|
Value v = map.get(src);
|
302
|
+
if (v == null) {
|
303
|
+
v = jsonColumn.getDefaultValue();
|
304
|
+
}
|
288
305
|
String newPath = jsonColumn.getPath();
|
289
306
|
Value visited = visit(newPath, v);
|
290
|
-
|
291
|
-
|
292
|
-
}
|
293
|
-
newValue.add(i++, jsonColumn.getPathValue());
|
294
|
-
newValue.add(i++, visited);
|
307
|
+
newValue.add(i++, jsonColumn.getBaseNameValue());
|
308
|
+
newValue.add(i++, visited == null ? ValueFactory.newNil() : visited);
|
295
309
|
}
|
296
310
|
}
|
297
311
|
else {
|
@@ -306,15 +320,16 @@ public class JsonVisitor
|
|
306
320
|
}
|
307
321
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
308
322
|
Map<Value, Value> map = mapValue.map();
|
309
|
-
|
310
|
-
for (JsonColumn jsonColumn : jsonAddColumns.values()) {
|
323
|
+
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
311
324
|
Value src = jsonColumn.getSrcBaseNameValue();
|
312
325
|
Value v = map.get(src);
|
313
326
|
if (v == null) {
|
314
327
|
v = jsonColumn.getDefaultValue();
|
315
328
|
}
|
316
|
-
|
317
|
-
|
329
|
+
String newPath = jsonColumn.getPath();
|
330
|
+
Value visited = visit(newPath, v);
|
331
|
+
newValue.add(i++, jsonColumn.getBaseNameValue());
|
332
|
+
newValue.add(i++, visited == null ? ValueFactory.newNil() : visited);
|
318
333
|
}
|
319
334
|
}
|
320
335
|
return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
|
@@ -325,7 +340,10 @@ public class JsonVisitor
|
|
325
340
|
if (! shouldVisit(rootPath)) {
|
326
341
|
return value;
|
327
342
|
}
|
328
|
-
if (value
|
343
|
+
if (value == null) {
|
344
|
+
return null;
|
345
|
+
}
|
346
|
+
else if (value.isArrayValue()) {
|
329
347
|
return visitArray(rootPath, value.asArrayValue());
|
330
348
|
}
|
331
349
|
else if (value.isMapValue()) {
|
@@ -0,0 +1,57 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import org.junit.Test;
|
4
|
+
|
5
|
+
import static org.junit.Assert.assertEquals;
|
6
|
+
import static org.junit.Assert.fail;
|
7
|
+
|
8
|
+
import org.embulk.spi.type.Types;
|
9
|
+
import org.msgpack.value.Value;
|
10
|
+
import org.msgpack.value.ValueFactory;
|
11
|
+
|
12
|
+
public class TestJsonColumn {
|
13
|
+
@Test
|
14
|
+
public void initialize()
|
15
|
+
{
|
16
|
+
try {
|
17
|
+
JsonColumn column = new JsonColumn("$.foo.bar", Types.BOOLEAN);
|
18
|
+
assertEquals("$.foo.bar", column.getSrc());
|
19
|
+
assertEquals(ValueFactory.newNil(), column.getDefaultValue());
|
20
|
+
}
|
21
|
+
catch (Exception e) {
|
22
|
+
fail();
|
23
|
+
}
|
24
|
+
|
25
|
+
try {
|
26
|
+
Value defaultValue = ValueFactory.newBoolean(true);
|
27
|
+
JsonColumn column = new JsonColumn("$.foo.bar", Types.BOOLEAN, defaultValue);
|
28
|
+
assertEquals("$.foo.bar", column.getSrc());
|
29
|
+
assertEquals(defaultValue, column.getDefaultValue());
|
30
|
+
}
|
31
|
+
catch (Exception e) {
|
32
|
+
fail();
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
@Test
|
37
|
+
public void parentPath()
|
38
|
+
{
|
39
|
+
assertEquals("$.foo.bar", JsonColumn.parentPath("$.foo.bar.baz"));
|
40
|
+
assertEquals("$.foo", JsonColumn.parentPath("$.foo.bar"));
|
41
|
+
assertEquals("$", JsonColumn.parentPath("$.foo"));
|
42
|
+
assertEquals("$.foo[0]", JsonColumn.parentPath("$.foo[0][1]"));
|
43
|
+
assertEquals("$.foo", JsonColumn.parentPath("$.foo[0]"));
|
44
|
+
assertEquals("$", JsonColumn.parentPath("$[0]"));
|
45
|
+
}
|
46
|
+
|
47
|
+
@Test
|
48
|
+
public void baseName()
|
49
|
+
{
|
50
|
+
assertEquals("baz", JsonColumn.baseName("$.foo.bar.baz"));
|
51
|
+
assertEquals("bar", JsonColumn.baseName("$.foo.bar"));
|
52
|
+
assertEquals("foo", JsonColumn.baseName("$.foo"));
|
53
|
+
assertEquals("[1]", JsonColumn.baseName("$.foo[0][1]"));
|
54
|
+
assertEquals("[0]", JsonColumn.baseName("$.foo[0]"));
|
55
|
+
assertEquals("[0]", JsonColumn.baseName("$[0]"));
|
56
|
+
}
|
57
|
+
}
|
@@ -0,0 +1,408 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.collect.Lists;
|
5
|
+
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
6
|
+
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
7
|
+
|
8
|
+
import org.embulk.EmbulkTestRuntime;
|
9
|
+
import org.embulk.config.ConfigLoader;
|
10
|
+
import org.embulk.config.ConfigSource;
|
11
|
+
import org.embulk.config.TaskSource;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.Exec;
|
14
|
+
import org.embulk.spi.FileInput;
|
15
|
+
import org.embulk.spi.ParserPlugin;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.SchemaConfig;
|
18
|
+
import org.embulk.spi.type.Type;
|
19
|
+
import org.joda.time.DateTimeZone;
|
20
|
+
import org.junit.Before;
|
21
|
+
import org.junit.Rule;
|
22
|
+
import org.junit.Test;
|
23
|
+
|
24
|
+
import static junit.framework.TestCase.assertFalse;
|
25
|
+
import static org.junit.Assert.assertEquals;
|
26
|
+
import static org.junit.Assert.fail;
|
27
|
+
|
28
|
+
import org.msgpack.value.MapValue;
|
29
|
+
import org.msgpack.value.Value;
|
30
|
+
import org.msgpack.value.ValueFactory;
|
31
|
+
|
32
|
+
import static org.embulk.spi.type.Types.BOOLEAN;
|
33
|
+
import static org.embulk.spi.type.Types.DOUBLE;
|
34
|
+
import static org.embulk.spi.type.Types.JSON;
|
35
|
+
import static org.embulk.spi.type.Types.LONG;
|
36
|
+
import static org.embulk.spi.type.Types.STRING;
|
37
|
+
import static org.embulk.spi.type.Types.TIMESTAMP;
|
38
|
+
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertNull;
|
40
|
+
import static org.junit.Assert.assertTrue;
|
41
|
+
import static org.junit.Assert.fail;
|
42
|
+
|
43
|
+
import java.io.File;
|
44
|
+
import java.io.IOException;
|
45
|
+
import java.util.ArrayList;
|
46
|
+
import java.util.HashMap;
|
47
|
+
import java.util.HashSet;
|
48
|
+
import java.util.List;
|
49
|
+
|
50
|
+
public class TestJsonVisitor {
|
51
|
+
@Rule
|
52
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
53
|
+
|
54
|
+
@Before
|
55
|
+
public void createReasource()
|
56
|
+
{
|
57
|
+
// config = config().set("type", "column");
|
58
|
+
}
|
59
|
+
|
60
|
+
private ConfigSource config()
|
61
|
+
{
|
62
|
+
return runtime.getExec().newConfigSource();
|
63
|
+
}
|
64
|
+
|
65
|
+
private Schema schema(Column... columns)
|
66
|
+
{
|
67
|
+
return new Schema(Lists.newArrayList(columns));
|
68
|
+
}
|
69
|
+
|
70
|
+
private ConfigSource configFromYamlString(String... lines)
|
71
|
+
{
|
72
|
+
StringBuilder builder = new StringBuilder();
|
73
|
+
for (String line : lines) {
|
74
|
+
builder.append(line).append("\n");
|
75
|
+
}
|
76
|
+
String yamlString = builder.toString();
|
77
|
+
|
78
|
+
ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
|
79
|
+
return loader.fromYamlString(yamlString);
|
80
|
+
}
|
81
|
+
|
82
|
+
private PluginTask taskFromYamlString(String... lines)
|
83
|
+
{
|
84
|
+
ConfigSource config = configFromYamlString(lines);
|
85
|
+
return config.loadConfig(PluginTask.class);
|
86
|
+
}
|
87
|
+
|
88
|
+
private JsonVisitor jsonVisitor(PluginTask task, Schema inputSchema)
|
89
|
+
{
|
90
|
+
Schema outputSchema = ColumnFilterPlugin.buildOutputSchema(task, inputSchema);
|
91
|
+
return new JsonVisitor(task, inputSchema, outputSchema);
|
92
|
+
}
|
93
|
+
|
94
|
+
@Test
|
95
|
+
public void buildShouldVisitSet()
|
96
|
+
{
|
97
|
+
PluginTask task = taskFromYamlString(
|
98
|
+
"type: column",
|
99
|
+
"columns:",
|
100
|
+
" - {name: \"$.json1.a.a.a\"}",
|
101
|
+
"add_columns:",
|
102
|
+
" - {name: \"$.json1.b.b[1].b\", type: string, default: foo}",
|
103
|
+
"drop_columns:",
|
104
|
+
" - {name: \"$.json1.c.c[*].c\"}");
|
105
|
+
Schema inputSchema = schema(
|
106
|
+
new Column(0, "json1", JSON),
|
107
|
+
new Column(1, "json2", JSON));
|
108
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
109
|
+
|
110
|
+
assertTrue(subject.shouldVisit("$.json1.a.a.a"));
|
111
|
+
assertTrue(subject.shouldVisit("$.json1.a.a"));
|
112
|
+
assertTrue(subject.shouldVisit("$.json1.a"));
|
113
|
+
assertTrue(subject.shouldVisit("$.json1.b.b[1].b"));
|
114
|
+
assertTrue(subject.shouldVisit("$.json1.b.b[1]"));
|
115
|
+
assertTrue(subject.shouldVisit("$.json1.b.b"));
|
116
|
+
assertTrue(subject.shouldVisit("$.json1.b"));
|
117
|
+
assertTrue(subject.shouldVisit("$.json1.c.c[*].c"));
|
118
|
+
assertTrue(subject.shouldVisit("$.json1.c.c[*]"));
|
119
|
+
assertTrue(subject.shouldVisit("$.json1.c.c"));
|
120
|
+
assertTrue(subject.shouldVisit("$.json1.c"));
|
121
|
+
assertTrue(subject.shouldVisit("$.json1"));
|
122
|
+
assertFalse(subject.shouldVisit("$.json2"));
|
123
|
+
}
|
124
|
+
|
125
|
+
@Test
|
126
|
+
public void buildJsonSchema_DropColumns()
|
127
|
+
{
|
128
|
+
PluginTask task = taskFromYamlString(
|
129
|
+
"type: column",
|
130
|
+
"drop_columns:",
|
131
|
+
" - {name: $.json1.a.default}",
|
132
|
+
" - {name: $.json1.a.copy}",
|
133
|
+
" - {name: \"$.json1.a.copy_array[1]\"}");
|
134
|
+
Schema inputSchema = schema(
|
135
|
+
new Column(0, "json1", JSON),
|
136
|
+
new Column(1, "json2", JSON));
|
137
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
138
|
+
|
139
|
+
assertFalse(subject.jsonDropColumns.containsKey("$.json1"));
|
140
|
+
assertTrue(subject.jsonDropColumns.containsKey("$.json1.a"));
|
141
|
+
assertTrue(subject.jsonDropColumns.containsKey("$.json1.a.copy_array"));
|
142
|
+
|
143
|
+
{
|
144
|
+
HashSet<String> jsonColumns = subject.jsonDropColumns.get("$.json1.a");
|
145
|
+
assertEquals(2, jsonColumns.size());
|
146
|
+
assertTrue(jsonColumns.contains("$.json1.a.default"));
|
147
|
+
assertTrue(jsonColumns.contains("$.json1.a.copy"));
|
148
|
+
}
|
149
|
+
|
150
|
+
{
|
151
|
+
HashSet<String> jsonColumns = subject.jsonDropColumns.get("$.json1.a.copy_array");
|
152
|
+
assertEquals(1, jsonColumns.size());
|
153
|
+
assertTrue(jsonColumns.contains("$.json1.a.copy_array[1]"));
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
@Test
|
158
|
+
public void buildJsonSchema_AddColumns()
|
159
|
+
{
|
160
|
+
PluginTask task = taskFromYamlString(
|
161
|
+
"type: column",
|
162
|
+
"add_columns:",
|
163
|
+
" - {name: $.json1.a.default, type: string, default: foo}",
|
164
|
+
" - {name: $.json1.a.copy, src: $.json1.a.src}",
|
165
|
+
" - {name: \"$.json1.a.copy_array[1]\", src: \"$.json1.a.copy_array[0]\"}");
|
166
|
+
Schema inputSchema = schema(
|
167
|
+
new Column(0, "json1", JSON),
|
168
|
+
new Column(1, "json2", JSON));
|
169
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
170
|
+
|
171
|
+
assertFalse(subject.jsonAddColumns.containsKey("$.json1"));
|
172
|
+
assertTrue(subject.jsonAddColumns.containsKey("$.json1.a"));
|
173
|
+
assertTrue(subject.jsonAddColumns.containsKey("$.json1.a.copy_array"));
|
174
|
+
|
175
|
+
{
|
176
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$.json1.a");
|
177
|
+
assertEquals(2, jsonColumns.size());
|
178
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
179
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
180
|
+
assertEquals("$.json1.a.default", keys[0]);
|
181
|
+
assertEquals("$.json1.a.default", values[0].getPath());
|
182
|
+
assertEquals("$.json1.a.copy", keys[1]);
|
183
|
+
assertEquals("$.json1.a.copy", values[1].getPath());
|
184
|
+
}
|
185
|
+
|
186
|
+
{
|
187
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$.json1.a.copy_array");
|
188
|
+
assertEquals(1, jsonColumns.size());
|
189
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
190
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
191
|
+
assertEquals("$.json1.a.copy_array[1]", keys[0]);
|
192
|
+
assertEquals("$.json1.a.copy_array[1]", values[0].getPath());
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
@Test
|
197
|
+
public void buildJsonSchema_Columns()
|
198
|
+
{
|
199
|
+
PluginTask task = taskFromYamlString(
|
200
|
+
"type: column",
|
201
|
+
"columns:",
|
202
|
+
" - {name: $.json1.a.default, type: string, default: foo}",
|
203
|
+
" - {name: $.json1.a.copy, src: $.json1.a.src}",
|
204
|
+
" - {name: \"$.json1.a.copy_array[1]\", src: \"$.json1.a.copy_array[0]\"}");
|
205
|
+
Schema inputSchema = schema(
|
206
|
+
new Column(0, "json1", JSON),
|
207
|
+
new Column(1, "json2", JSON));
|
208
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
209
|
+
|
210
|
+
assertFalse(subject.jsonColumns.containsKey("$.json1"));
|
211
|
+
assertTrue(subject.jsonColumns.containsKey("$.json1.a"));
|
212
|
+
assertTrue(subject.jsonColumns.containsKey("$.json1.a.copy_array"));
|
213
|
+
|
214
|
+
{
|
215
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$.json1.a");
|
216
|
+
assertEquals(2, jsonColumns.size());
|
217
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
218
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
219
|
+
assertEquals("$.json1.a.default", keys[0]);
|
220
|
+
assertEquals("$.json1.a.default", values[0].getPath());
|
221
|
+
assertEquals("$.json1.a.copy", keys[1]);
|
222
|
+
assertEquals("$.json1.a.copy", values[1].getPath());
|
223
|
+
}
|
224
|
+
|
225
|
+
{
|
226
|
+
HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$.json1.a.copy_array");
|
227
|
+
assertEquals(1, jsonColumns.size());
|
228
|
+
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
229
|
+
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
230
|
+
assertEquals("$.json1.a.copy_array[1]", keys[0]);
|
231
|
+
assertEquals("$.json1.a.copy_array[1]", values[0].getPath());
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
@Test
|
236
|
+
public void buildJsonSchema_Mix() {
|
237
|
+
PluginTask task = taskFromYamlString(
|
238
|
+
"type: column",
|
239
|
+
"drop_columns:",
|
240
|
+
" - {name: $.json1.a.default}",
|
241
|
+
"add_columns:",
|
242
|
+
" - {name: $.json1.a.copy, src: $.json1.a.src}",
|
243
|
+
"columns:",
|
244
|
+
" - {name: \"$.json1.a.copy_array[1]\", src: \"$.json1.a.copy_array[0]\"}");
|
245
|
+
Schema inputSchema = schema(
|
246
|
+
new Column(0, "json1", JSON),
|
247
|
+
new Column(1, "json2", JSON));
|
248
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
249
|
+
|
250
|
+
assertFalse(subject.jsonDropColumns.isEmpty());
|
251
|
+
assertFalse(subject.jsonAddColumns.isEmpty());
|
252
|
+
assertTrue(subject.jsonColumns.isEmpty()); // drop_columns overcome columns
|
253
|
+
}
|
254
|
+
|
255
|
+
@Test
|
256
|
+
public void visitMap_DropColumns() {
|
257
|
+
PluginTask task = taskFromYamlString(
|
258
|
+
"type: column",
|
259
|
+
"drop_columns:",
|
260
|
+
" - {name: $.json1.k1.k1}",
|
261
|
+
" - {name: $.json1.k2}");
|
262
|
+
Schema inputSchema = schema(
|
263
|
+
new Column(0, "json1", JSON),
|
264
|
+
new Column(1, "json2", JSON));
|
265
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
266
|
+
|
267
|
+
// {"k1":{"k1":"v"},"k2":{"k2":"v"}}
|
268
|
+
Value k1 = ValueFactory.newString("k1");
|
269
|
+
Value k2 = ValueFactory.newString("k2");
|
270
|
+
Value v = ValueFactory.newString("v");
|
271
|
+
Value map = ValueFactory.newMap(
|
272
|
+
k1, ValueFactory.newMap(k1, v),
|
273
|
+
k2, ValueFactory.newMap(k2, v));
|
274
|
+
|
275
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
276
|
+
assertEquals("{\"k1\":{}}", visited.toString());
|
277
|
+
}
|
278
|
+
|
279
|
+
@Test
|
280
|
+
public void visitMap_AddColumns() {
|
281
|
+
PluginTask task = taskFromYamlString(
|
282
|
+
"type: column",
|
283
|
+
"add_columns:",
|
284
|
+
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
285
|
+
" - {name: $.json1.k3.k3, type: string, default: v}",
|
286
|
+
" - {name: $.json1.k4, src: $.json1.k2}");
|
287
|
+
Schema inputSchema = schema(
|
288
|
+
new Column(0, "json1", JSON),
|
289
|
+
new Column(1, "json2", JSON));
|
290
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
291
|
+
|
292
|
+
// {"k1":{"k1":"v"},"k2":{"k2":"v"}}
|
293
|
+
Value k1 = ValueFactory.newString("k1");
|
294
|
+
Value k2 = ValueFactory.newString("k2");
|
295
|
+
Value v = ValueFactory.newString("v");
|
296
|
+
Value map = ValueFactory.newMap(
|
297
|
+
k1, ValueFactory.newMap(k1, v),
|
298
|
+
k2, ValueFactory.newMap(k2, v));
|
299
|
+
|
300
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
301
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
302
|
+
}
|
303
|
+
|
304
|
+
@Test
|
305
|
+
public void visitMap_Columns() {
|
306
|
+
PluginTask task = taskFromYamlString(
|
307
|
+
"type: column",
|
308
|
+
"columns:",
|
309
|
+
" - {name: $.json1.k1}",
|
310
|
+
" - {name: $.json1.k2.k2}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
|
311
|
+
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
312
|
+
" - {name: $.json1.k3.k3, type: string, default: v}",
|
313
|
+
" - {name: $.json1.k4, src: $.json1.k2}");
|
314
|
+
Schema inputSchema = schema(
|
315
|
+
new Column(0, "json1", JSON),
|
316
|
+
new Column(1, "json2", JSON));
|
317
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
318
|
+
|
319
|
+
// {"k1":{"k1":"v"},"k2":{"k1":"v","k2":"v"}}
|
320
|
+
Value k1 = ValueFactory.newString("k1");
|
321
|
+
Value k2 = ValueFactory.newString("k2");
|
322
|
+
Value v = ValueFactory.newString("v");
|
323
|
+
Value map = ValueFactory.newMap(
|
324
|
+
k1, ValueFactory.newMap(k1, v),
|
325
|
+
k2, ValueFactory.newMap(k2, v));
|
326
|
+
|
327
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
328
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
329
|
+
}
|
330
|
+
|
331
|
+
@Test
|
332
|
+
public void visitArray_DropColumns() {
|
333
|
+
PluginTask task = taskFromYamlString(
|
334
|
+
"type: column",
|
335
|
+
"drop_columns:",
|
336
|
+
" - {name: \"$.json1.k1[0].k1\"}",
|
337
|
+
" - {name: \"$.json1.k2[*]\"}");
|
338
|
+
Schema inputSchema = schema(
|
339
|
+
new Column(0, "json1", JSON),
|
340
|
+
new Column(1, "json2", JSON));
|
341
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
342
|
+
|
343
|
+
// {"k1":[{"k1":"v"}[,"k2":["v","v"]}
|
344
|
+
Value k1 = ValueFactory.newString("k1");
|
345
|
+
Value k2 = ValueFactory.newString("k2");
|
346
|
+
Value v = ValueFactory.newString("v");
|
347
|
+
Value map = ValueFactory.newMap(
|
348
|
+
k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)),
|
349
|
+
k2, ValueFactory.newArray(v, v));
|
350
|
+
|
351
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
352
|
+
assertEquals("{\"k1\":[{}],\"k2\":[]}", visited.toString());
|
353
|
+
}
|
354
|
+
|
355
|
+
@Test
|
356
|
+
public void visitArray_AddColumns() {
|
357
|
+
PluginTask task = taskFromYamlString(
|
358
|
+
"type: column",
|
359
|
+
"add_columns:",
|
360
|
+
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
361
|
+
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
362
|
+
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
363
|
+
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
364
|
+
Schema inputSchema = schema(
|
365
|
+
new Column(0, "json1", JSON),
|
366
|
+
new Column(1, "json2", JSON));
|
367
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
368
|
+
|
369
|
+
// {"k1":[{"k1":"v"}],"k2":["v","v"]}
|
370
|
+
Value k1 = ValueFactory.newString("k1");
|
371
|
+
Value k2 = ValueFactory.newString("k2");
|
372
|
+
Value v = ValueFactory.newString("v");
|
373
|
+
Value map = ValueFactory.newMap(
|
374
|
+
k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)),
|
375
|
+
k2, ValueFactory.newArray(v, v));
|
376
|
+
|
377
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
378
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"},{\"k1\":\"v\"}],\"k2\":[\"v\",\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
379
|
+
}
|
380
|
+
|
381
|
+
@Test
|
382
|
+
public void visitArray_Columns() {
|
383
|
+
PluginTask task = taskFromYamlString(
|
384
|
+
"type: column",
|
385
|
+
"columns:",
|
386
|
+
" - {name: \"$.json1.k1\"}",
|
387
|
+
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
388
|
+
" - {name: \"$.json1.k2[0]\"}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
|
389
|
+
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
390
|
+
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
391
|
+
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
392
|
+
Schema inputSchema = schema(
|
393
|
+
new Column(0, "json1", JSON),
|
394
|
+
new Column(1, "json2", JSON));
|
395
|
+
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
396
|
+
|
397
|
+
// {"k1":[{"k1":"v"},"v"],"k2":["v","v"]}
|
398
|
+
Value k1 = ValueFactory.newString("k1");
|
399
|
+
Value k2 = ValueFactory.newString("k2");
|
400
|
+
Value v = ValueFactory.newString("v");
|
401
|
+
Value map = ValueFactory.newMap(
|
402
|
+
k1, ValueFactory.newArray(ValueFactory.newMap(k1, v), v),
|
403
|
+
k2, ValueFactory.newArray(v, v));
|
404
|
+
|
405
|
+
MapValue visited = subject.visit("$.json1", map).asMapValue();
|
406
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
407
|
+
}
|
408
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -67,8 +67,9 @@ files:
|
|
67
67
|
- src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
|
68
68
|
- src/main/java/org/embulk/filter/column/JsonColumn.java
|
69
69
|
- src/main/java/org/embulk/filter/column/JsonVisitor.java
|
70
|
-
- src/test/java/org/embulk/filter/
|
71
|
-
-
|
70
|
+
- src/test/java/org/embulk/filter/column/TestJsonColumn.java
|
71
|
+
- src/test/java/org/embulk/filter/column/TestJsonVisitor.java
|
72
|
+
- classpath/embulk-filter-column-0.5.1.jar
|
72
73
|
homepage: https://github.com/sonots/embulk-filter-column
|
73
74
|
licenses:
|
74
75
|
- MIT
|