embulk-filter-column 0.6.0.pre2 → 0.6.0.pre3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -1
- data/README.md +2 -14
- data/build.gradle +2 -2
- data/example/add_columns.txt +11 -0
- data/example/add_columns.yml +1 -1
- data/example/bracket_notations.txt +11 -0
- data/example/columns.txt +11 -0
- data/example/drop_columns.txt +11 -0
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +1 -1
- data/src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java +1 -1
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +13 -16
- data/src/main/java/org/embulk/filter/column/JsonPathUtil.java +76 -0
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +173 -71
- data/src/test/java/org/embulk/filter/column/TestJsonColumn.java +8 -8
- data/src/test/java/org/embulk/filter/column/TestJsonVisitor.java +98 -50
- metadata +9 -5
- data/src/main/java/org/embulk/filter/column/PathTokenUtil.java +0 -39
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d26c69b6a7bfb4a6c4d68c6967301a0dc32562f5
|
4
|
+
data.tar.gz: c674640721b35540a9aba9aef1ad76c5cb5adc96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cc5e8165d85afed48b65302d0baa9f54e35b1ae5438f390b5b5970d8342e14cf72b30f137ca901229578c3493b23a4c3a6286ea076501bc926eaed97491f70f
|
7
|
+
data.tar.gz: 867fa85522091768287f5b4ade0b7e17947899e1f6ccbeeb017d5115fb0468d418c62712af308a80580afe1be6bb5a2f3ac73c7c1274b11d4c674dd8c8635a66
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -133,27 +133,15 @@ NOTE:
|
|
133
133
|
|
134
134
|
NOTE:
|
135
135
|
|
136
|
-
|
136
|
+
Rename or copy of json paths by `src` option is only partially supported yet. The parent json path must be same like:
|
137
137
|
|
138
138
|
```
|
139
|
-
- (name: $.payload.foo}
|
140
|
-
- {name: $.payload.foo.bar}
|
141
|
-
```
|
142
|
-
|
143
|
-
NOTE:
|
144
|
-
|
145
|
-
`src` (to rename or copy columns) is only partially supported yet. The upper json path must be same like:
|
146
|
-
|
147
|
-
```
|
148
|
-
- {name: $.payload.foo}
|
149
139
|
- {name: $.payload.foo.dest, src: $.payload.foo.src}
|
150
140
|
```
|
151
141
|
|
152
|
-
|
142
|
+
I mean that below example does not work yet (`$.payload.foo` and `$.payload.bar`)
|
153
143
|
|
154
144
|
```
|
155
|
-
- {name: $.payload.foo}
|
156
|
-
- {name: $.payload.bar}
|
157
145
|
- {name: $.payload.foo.dest, src: $.payload.bar.src}
|
158
146
|
```
|
159
147
|
|
data/build.gradle
CHANGED
@@ -15,14 +15,14 @@ configurations {
|
|
15
15
|
provided
|
16
16
|
}
|
17
17
|
|
18
|
-
version = "0.6.0.
|
18
|
+
version = "0.6.0.pre3"
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.+"
|
24
24
|
provided "org.embulk:embulk-core:0.8.+"
|
25
|
-
compile
|
25
|
+
compile "io.github.medjed:JsonPathCompiler:0.0.+"
|
26
26
|
|
27
27
|
testCompile "junit:junit:4.+"
|
28
28
|
testCompile "org.embulk:embulk-core:0.8.+:tests"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:29:31.933 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:29:32.859 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:29:32.876 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:29:32.882 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
|
6
|
+
| time:timestamp | id:long | name:string | score:double | json:json | foo:long | d:timestamp | t:timestamp | copy_score:double |
|
7
|
+
+-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
|
8
|
+
| 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo","bar":"bar","d":"2015-07-13","copy_foo":"foo"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
|
9
|
+
| 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo2","bar":1,"d":"2015-07-13","copy_foo":"foo2"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
|
10
|
+
| | | | 9170.0 | | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 9170.0 |
|
11
|
+
+-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
|
data/example/add_columns.yml
CHANGED
@@ -23,7 +23,7 @@ filters:
|
|
23
23
|
- {name: d, type: timestamp, default: "2015-07-13"}
|
24
24
|
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
|
25
25
|
- {name: copy_score, src: score}
|
26
|
-
- {name: $.json.
|
26
|
+
- {name: $.json.bar, type: long, default: 1}
|
27
27
|
- {name: $.json.d, type: string, default: "2015-07-13"}
|
28
28
|
- {name: $.json.copy_foo, src: $.json.foo}
|
29
29
|
out:
|
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:29:47.367 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:29:48.254 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:29:48.270 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:29:48.274 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
|
6
|
+
| time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
|
7
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
|
9
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
|
10
|
+
| 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO","array_a":[{"a":"default0"}],"array_b":[{"b":"default"},{"b":"default"}]} |
|
11
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
|
data/example/columns.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:30:03.618 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:30:04.499 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:30:04.516 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:30:04.521 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
|
6
|
+
| time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
|
7
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
|
8
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo"} |
|
9
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2"} |
|
10
|
+
| 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO"} |
|
11
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:30:16.407 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:30:17.290 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:30:17.305 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:30:17.310 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+---------------------------------------------+--------------+---------------+
|
6
|
+
| name:string | score:double | json:json |
|
7
|
+
+---------------------------------------------+--------------+---------------+
|
8
|
+
| Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"bar":"bar"} |
|
9
|
+
| Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {} |
|
10
|
+
| | 9170.0 | |
|
11
|
+
+---------------------------------------------+--------------+---------------+
|
@@ -27,7 +27,7 @@ import org.slf4j.Logger;
|
|
27
27
|
|
28
28
|
import java.util.List;
|
29
29
|
|
30
|
-
import
|
30
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
31
31
|
|
32
32
|
public class ColumnFilterPlugin implements FilterPlugin
|
33
33
|
{
|
@@ -1,6 +1,6 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.Utils;
|
4
4
|
import com.google.common.base.Throwables;
|
5
5
|
|
6
6
|
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
@@ -1,12 +1,12 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
8
|
-
import
|
9
|
-
import
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
8
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
10
10
|
import org.embulk.config.ConfigException;
|
11
11
|
import org.embulk.spi.type.Type;
|
12
12
|
import org.msgpack.value.StringValue;
|
@@ -56,17 +56,14 @@ public class JsonColumn
|
|
56
56
|
this.pathValue = ValueFactory.newString(path);
|
57
57
|
this.parentPath = compiledPath.getParentPath();
|
58
58
|
|
59
|
-
|
60
|
-
throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", path));
|
61
|
-
}
|
62
|
-
this.tailIndex = tailIndex(compiledRoot);
|
59
|
+
this.tailIndex = getTailIndex(compiledRoot);
|
63
60
|
this.parentPathValue = ValueFactory.newString(parentPath);
|
64
61
|
String tailName = getTailName(compiledRoot);
|
65
62
|
this.tailNameValue = tailName == null ? ValueFactory.newNil() : ValueFactory.newString(tailName);
|
66
63
|
|
67
64
|
this.srcValue = ValueFactory.newString(this.src);
|
68
65
|
this.srcParentPath = compiledSrc.getParentPath();
|
69
|
-
this.srcTailIndex =
|
66
|
+
this.srcTailIndex = getTailIndex(compiledSrcRoot);
|
70
67
|
this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
|
71
68
|
String srcTailName = getTailName(compiledSrcRoot);
|
72
69
|
this.srcTailNameValue = srcTailName == null ? ValueFactory.newNil() : ValueFactory.newString(srcTailName);
|
@@ -92,12 +89,12 @@ public class JsonColumn
|
|
92
89
|
}
|
93
90
|
}
|
94
91
|
|
95
|
-
private Long
|
92
|
+
private Long getTailIndex(RootPathToken root)
|
96
93
|
{
|
97
94
|
PathToken tail = root.getTail();
|
98
95
|
if (tail instanceof ArrayPathToken) {
|
99
96
|
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) tail).getArrayIndexOperation();
|
100
|
-
|
97
|
+
JsonPathUtil.assertSupportedArrayPathToken(arrayIndexOperation, path);
|
101
98
|
return arrayIndexOperation.indexes().get(0).longValue();
|
102
99
|
}
|
103
100
|
else {
|
@@ -135,7 +132,7 @@ public class JsonColumn
|
|
135
132
|
return parentPath;
|
136
133
|
}
|
137
134
|
|
138
|
-
public Long
|
135
|
+
public Long getTailIndex()
|
139
136
|
{
|
140
137
|
return tailIndex;
|
141
138
|
}
|
@@ -186,7 +183,7 @@ public class JsonColumn
|
|
186
183
|
return ((RootPathToken) PathCompiler.compile(path).getRoot()).getTailPath();
|
187
184
|
}
|
188
185
|
|
189
|
-
public static Long
|
186
|
+
public static Long getTailIndex(String path)
|
190
187
|
{
|
191
188
|
Path compiledPath = PathCompiler.compile(path);
|
192
189
|
PathToken tail = ((RootPathToken) compiledPath.getRoot()).getTail();
|
@@ -0,0 +1,76 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import io.github.medjed.jsonpathcompiler.InvalidPathException;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
|
8
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
10
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
|
11
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
12
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
|
13
|
+
import org.embulk.config.ConfigException;
|
14
|
+
|
15
|
+
public class JsonPathUtil
|
16
|
+
{
|
17
|
+
public static void assertJsonPathFormat(String path)
|
18
|
+
{
|
19
|
+
Path compiledPath;
|
20
|
+
try {
|
21
|
+
compiledPath = PathCompiler.compile(path);
|
22
|
+
}
|
23
|
+
catch (InvalidPathException e) {
|
24
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
25
|
+
}
|
26
|
+
PathToken pathToken = compiledPath.getRoot();
|
27
|
+
while (true) {
|
28
|
+
assertSupportedPathToken(pathToken, path);
|
29
|
+
if (pathToken.isLeaf()) {
|
30
|
+
break;
|
31
|
+
}
|
32
|
+
pathToken = pathToken.next();
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
public static void assertSupportedPathToken(PathToken pathToken, String path)
|
37
|
+
{
|
38
|
+
if (pathToken instanceof ArrayPathToken) {
|
39
|
+
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
|
40
|
+
assertSupportedArrayPathToken(arrayIndexOperation, path);
|
41
|
+
}
|
42
|
+
else if (pathToken instanceof ScanPathToken) {
|
43
|
+
throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
|
44
|
+
}
|
45
|
+
else if (pathToken instanceof FunctionPathToken) {
|
46
|
+
throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
|
47
|
+
}
|
48
|
+
else if (pathToken instanceof PredicatePathToken) {
|
49
|
+
throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
|
54
|
+
{
|
55
|
+
if (arrayIndexOperation == null) {
|
56
|
+
throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
|
57
|
+
}
|
58
|
+
else if (!arrayIndexOperation.isSingleIndexOperation()) {
|
59
|
+
throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
public static void assertDoNotEndsWithArrayWildcard(String path)
|
64
|
+
{
|
65
|
+
Path compiledPath;
|
66
|
+
try {
|
67
|
+
compiledPath = PathCompiler.compile(path);
|
68
|
+
}
|
69
|
+
catch (InvalidPathException e) {
|
70
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
71
|
+
}
|
72
|
+
if (((RootPathToken) compiledPath.getRoot()).getTailPath().equals("[*]")) {
|
73
|
+
throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", compiledPath.toString()));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
@@ -1,9 +1,10 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
3
|
+
import io.github.medjed.jsonpathcompiler.InvalidPathException;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
7
8
|
import org.embulk.config.ConfigException;
|
8
9
|
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
9
10
|
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
@@ -18,6 +19,7 @@ import org.embulk.spi.type.LongType;
|
|
18
19
|
import org.embulk.spi.type.StringType;
|
19
20
|
import org.embulk.spi.type.TimestampType;
|
20
21
|
import org.embulk.spi.type.Type;
|
22
|
+
import org.embulk.spi.type.Types;
|
21
23
|
import org.msgpack.value.ArrayValue;
|
22
24
|
import org.msgpack.value.MapValue;
|
23
25
|
import org.msgpack.value.Value;
|
@@ -38,9 +40,13 @@ public class JsonVisitor
|
|
38
40
|
final PluginTask task;
|
39
41
|
final Schema inputSchema;
|
40
42
|
final Schema outputSchema;
|
43
|
+
// jsonpath
|
41
44
|
final HashSet<String> shouldVisitSet = new HashSet<>();
|
45
|
+
// parent jsonpath => { jsonpath => json column }
|
42
46
|
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
|
47
|
+
// parent jsonpath => { jsonpath => json column }
|
43
48
|
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
|
49
|
+
// parent jsonpath => [ jsonpath ]
|
44
50
|
final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
|
45
51
|
|
46
52
|
JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
|
@@ -92,6 +98,18 @@ public class JsonVisitor
|
|
92
98
|
jsonColumns.get(parentPath).put(compiledPath.toString(), value);
|
93
99
|
}
|
94
100
|
|
101
|
+
private boolean jsonColumnsContainsKey(String path)
|
102
|
+
{
|
103
|
+
Path compiledPath = PathCompiler.compile(path);
|
104
|
+
String parentPath = compiledPath.getParentPath();
|
105
|
+
if (jsonColumns.containsKey(parentPath)) {
|
106
|
+
return jsonColumns.get(parentPath).containsKey(compiledPath.toString());
|
107
|
+
}
|
108
|
+
else {
|
109
|
+
return false;
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
95
113
|
private void jsonAddColumnsPut(String path, JsonColumn value)
|
96
114
|
{
|
97
115
|
Path compiledPath = PathCompiler.compile(path);
|
@@ -102,6 +120,18 @@ public class JsonVisitor
|
|
102
120
|
jsonAddColumns.get(parentPath).put(compiledPath.toString(), value);
|
103
121
|
}
|
104
122
|
|
123
|
+
private boolean jsonAddColumnsContainsKey(String path)
|
124
|
+
{
|
125
|
+
Path compiledPath = PathCompiler.compile(path);
|
126
|
+
String parentPath = compiledPath.getParentPath();
|
127
|
+
if (jsonAddColumns.containsKey(parentPath)) {
|
128
|
+
return jsonAddColumns.get(parentPath).containsKey(compiledPath.toString());
|
129
|
+
}
|
130
|
+
else {
|
131
|
+
return false;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
105
135
|
private void jsonDropColumnsPut(String path)
|
106
136
|
{
|
107
137
|
Path compiledPath = PathCompiler.compile(path);
|
@@ -112,68 +142,98 @@ public class JsonVisitor
|
|
112
142
|
jsonDropColumns.get(parentPath).add(compiledPath.toString());
|
113
143
|
}
|
114
144
|
|
115
|
-
|
116
|
-
private void buildJsonSchema()
|
145
|
+
private void buildJsonColumns()
|
117
146
|
{
|
118
147
|
List<ColumnConfig> columns = task.getColumns();
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
148
|
+
for (ColumnConfig column : columns) {
|
149
|
+
String name = column.getName();
|
150
|
+
// skip NON json path notation to build output schema
|
151
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
152
|
+
continue;
|
153
|
+
}
|
154
|
+
JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
|
155
|
+
// automatically fill ancestor jsonpaths
|
156
|
+
for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
|
157
|
+
String ancestorJsonPath = ancestorJsonColumn.getPath();
|
158
|
+
if (!jsonColumnsContainsKey(ancestorJsonPath)) {
|
159
|
+
jsonColumnsPut(ancestorJsonPath, ancestorJsonColumn);
|
129
160
|
}
|
130
|
-
|
161
|
+
}
|
162
|
+
// leaf jsonpath
|
163
|
+
if (column.getSrc().isPresent()) {
|
164
|
+
String src = column.getSrc().get();
|
165
|
+
jsonColumnsPut(name, new JsonColumn(name, null, null, src));
|
166
|
+
}
|
167
|
+
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
168
|
+
Type type = column.getType().get();
|
169
|
+
Value defaultValue = getDefault(task, name, type, column);
|
170
|
+
jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
171
|
+
}
|
172
|
+
else {
|
173
|
+
Type type = column.getType().isPresent() ? column.getType().get() : null;
|
174
|
+
jsonColumnsPut(name, new JsonColumn(name, type));
|
131
175
|
}
|
132
176
|
}
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
Type type = column.getType().isPresent() ? column.getType().get() : null;
|
151
|
-
jsonColumnsPut(name, new JsonColumn(name, type));
|
177
|
+
}
|
178
|
+
|
179
|
+
private void buildJsonAddColumns()
|
180
|
+
{
|
181
|
+
List<ColumnConfig> addColumns = task.getAddColumns();
|
182
|
+
for (ColumnConfig column : addColumns) {
|
183
|
+
String name = column.getName();
|
184
|
+
// skip NON json path notation to build output schema
|
185
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
186
|
+
continue;
|
187
|
+
}
|
188
|
+
JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
|
189
|
+
// automatically fill ancestor jsonpaths
|
190
|
+
for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
|
191
|
+
String ancestorJsonPath = ancestorJsonColumn.getPath();
|
192
|
+
if (!jsonAddColumnsContainsKey(ancestorJsonPath)) {
|
193
|
+
jsonAddColumnsPut(ancestorJsonPath, ancestorJsonColumn);
|
152
194
|
}
|
153
195
|
}
|
196
|
+
// leaf jsonpath
|
197
|
+
if (column.getSrc().isPresent()) {
|
198
|
+
String src = column.getSrc().get();
|
199
|
+
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
200
|
+
}
|
201
|
+
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
202
|
+
Type type = column.getType().get();
|
203
|
+
Value defaultValue = getDefault(task, name, type, column);
|
204
|
+
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
205
|
+
}
|
206
|
+
else {
|
207
|
+
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
|
208
|
+
}
|
154
209
|
}
|
210
|
+
}
|
155
211
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
if (column.getSrc().isPresent()) {
|
165
|
-
String src = column.getSrc().get();
|
166
|
-
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
167
|
-
}
|
168
|
-
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
169
|
-
Type type = column.getType().get();
|
170
|
-
Value defaultValue = getDefault(task, name, type, column);
|
171
|
-
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
172
|
-
}
|
173
|
-
else {
|
174
|
-
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
|
175
|
-
}
|
212
|
+
private void buildJsonDropColumns()
|
213
|
+
{
|
214
|
+
List<ColumnConfig> dropColumns = task.getDropColumns();
|
215
|
+
for (ColumnConfig dropColumn : dropColumns) {
|
216
|
+
String name = dropColumn.getName();
|
217
|
+
// skip NON json path notation to build output schema
|
218
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
219
|
+
continue;
|
176
220
|
}
|
221
|
+
jsonDropColumnsPut(name);
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
225
|
+
// build jsonColumns, jsonAddColumns, and jsonDropColumns
|
226
|
+
private void buildJsonSchema()
|
227
|
+
{
|
228
|
+
if (task.getDropColumns().size() > 0) {
|
229
|
+
buildJsonDropColumns();
|
230
|
+
}
|
231
|
+
else if (task.getColumns().size() > 0) {
|
232
|
+
buildJsonColumns();
|
233
|
+
}
|
234
|
+
// Add columns to last. If you want to add to head or middle, you can use `columns` option
|
235
|
+
if (task.getAddColumns().size() > 0) {
|
236
|
+
buildJsonAddColumns();
|
177
237
|
}
|
178
238
|
}
|
179
239
|
|
@@ -189,23 +249,53 @@ public class JsonVisitor
|
|
189
249
|
if (!PathCompiler.isProbablyJsonPath(name)) {
|
190
250
|
continue;
|
191
251
|
}
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
} catch (InvalidPathException e) {
|
196
|
-
throw new ConfigException(String.format("path %s, %s", name, e.getMessage()));
|
252
|
+
JsonPathUtil.assertJsonPathFormat(name);
|
253
|
+
for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
|
254
|
+
this.shouldVisitSet.add(ancestorJsonColumn.getPath());
|
197
255
|
}
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
256
|
+
Path path = PathCompiler.compile(name);
|
257
|
+
this.shouldVisitSet.add(path.toString());
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
/*
|
262
|
+
* <pre>
|
263
|
+
* $['foo']['bar'][0]['baz']
|
264
|
+
* #=>
|
265
|
+
* name: $['foo'], type: json, default: {}
|
266
|
+
* name: $['foo']['bar'], type: json, default: []
|
267
|
+
* name: $['foo']['bar'][0], type: json, default: {}
|
268
|
+
* </pre>
|
269
|
+
*
|
270
|
+
* @return ancestors as an array
|
271
|
+
*/
|
272
|
+
public static ArrayList<JsonColumn> getAncestorJsonColumnList(String path)
|
273
|
+
{
|
274
|
+
ArrayList<JsonColumn> ancestorJsonColumnList = new ArrayList<>();
|
275
|
+
Path compiledPath;
|
276
|
+
try {
|
277
|
+
compiledPath = PathCompiler.compile(path);
|
278
|
+
}
|
279
|
+
catch (InvalidPathException e) {
|
280
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
281
|
+
}
|
282
|
+
StringBuilder partialPath = new StringBuilder("$");
|
283
|
+
PathToken parts = compiledPath.getRoot();
|
284
|
+
parts = parts.next(); // skip "$"
|
285
|
+
while (! parts.isLeaf()) {
|
286
|
+
partialPath.append(parts.getPathFragment());
|
287
|
+
PathToken next = parts.next();
|
288
|
+
JsonColumn jsonColumn;
|
289
|
+
if (next instanceof ArrayPathToken) {
|
290
|
+
jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newArray(new Value[0], false));
|
291
|
+
}
|
292
|
+
else {
|
293
|
+
jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newMap(new Value[0]));
|
207
294
|
}
|
295
|
+
ancestorJsonColumnList.add(jsonColumn);
|
296
|
+
parts = next;
|
208
297
|
}
|
298
|
+
return ancestorJsonColumnList;
|
209
299
|
}
|
210
300
|
|
211
301
|
boolean shouldVisit(String jsonPath)
|
@@ -253,7 +343,7 @@ public class JsonVisitor
|
|
253
343
|
}
|
254
344
|
String newPath = jsonColumn.getPath();
|
255
345
|
Value visited = visit(newPath, v);
|
256
|
-
// int i = jsonColumn.
|
346
|
+
// int i = jsonColumn.getTailIndex().intValue();
|
257
347
|
// index is shifted, so j++ is used.
|
258
348
|
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
259
349
|
}
|
@@ -267,6 +357,12 @@ public class JsonVisitor
|
|
267
357
|
}
|
268
358
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
269
359
|
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
360
|
+
int i = jsonColumn.getTailIndex().intValue();
|
361
|
+
if (i < size) {
|
362
|
+
// index for add_columns must be larger than size
|
363
|
+
// just skip because we can not raise ConfigException beforehand for flexible JSON
|
364
|
+
continue;
|
365
|
+
}
|
270
366
|
int src = jsonColumn.getSrcTailIndex().intValue();
|
271
367
|
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
272
368
|
if (v == null) {
|
@@ -326,6 +422,12 @@ public class JsonVisitor
|
|
326
422
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
327
423
|
Map<Value, Value> map = mapValue.map();
|
328
424
|
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
425
|
+
Value k = jsonColumn.getTailNameValue();
|
426
|
+
if (map.containsKey(k)) {
|
427
|
+
// key must be different with already existing one for add_columns
|
428
|
+
// just skip because we can not raise ConfigException beforehand for flexible JSON
|
429
|
+
continue;
|
430
|
+
}
|
329
431
|
Value src = jsonColumn.getSrcTailNameValue();
|
330
432
|
Value v = map.get(src);
|
331
433
|
if (v == null) {
|
@@ -67,13 +67,13 @@ public class TestJsonColumn
|
|
67
67
|
}
|
68
68
|
|
69
69
|
@Test
|
70
|
-
public void
|
70
|
+
public void getTailIndex()
|
71
71
|
{
|
72
|
-
assertEquals(null, JsonColumn.
|
73
|
-
assertEquals(null, JsonColumn.
|
74
|
-
assertEquals(null, JsonColumn.
|
75
|
-
assertEquals(new Long(1), JsonColumn.
|
76
|
-
assertEquals(new Long(0), JsonColumn.
|
77
|
-
assertEquals(new Long(0), JsonColumn.
|
72
|
+
assertEquals(null, JsonColumn.getTailIndex("$['foo'].bar.baz"));
|
73
|
+
assertEquals(null, JsonColumn.getTailIndex("$.foo.bar"));
|
74
|
+
assertEquals(null, JsonColumn.getTailIndex("$.foo"));
|
75
|
+
assertEquals(new Long(1), JsonColumn.getTailIndex("$.foo[0][1]"));
|
76
|
+
assertEquals(new Long(0), JsonColumn.getTailIndex("$.foo[0]"));
|
77
|
+
assertEquals(new Long(0), JsonColumn.getTailIndex("$[0]"));
|
78
78
|
}
|
79
|
-
}
|
79
|
+
}
|
@@ -14,13 +14,14 @@ import org.junit.rules.ExpectedException;
|
|
14
14
|
import org.msgpack.value.MapValue;
|
15
15
|
import org.msgpack.value.Value;
|
16
16
|
import org.msgpack.value.ValueFactory;
|
17
|
-
import com.dena.analytics.jsonpathcompiler.InvalidPathException;
|
18
17
|
|
19
18
|
import static org.embulk.spi.type.Types.JSON;
|
20
19
|
import static org.junit.Assert.assertEquals;
|
21
20
|
import static org.junit.Assert.assertFalse;
|
22
21
|
import static org.junit.Assert.assertTrue;
|
23
22
|
|
23
|
+
import java.util.ArrayList;
|
24
|
+
import java.util.Arrays;
|
24
25
|
import java.util.HashMap;
|
25
26
|
import java.util.HashSet;
|
26
27
|
|
@@ -58,16 +59,32 @@ public class TestJsonVisitor
|
|
58
59
|
return new JsonVisitor(task, inputSchema, outputSchema);
|
59
60
|
}
|
60
61
|
|
61
|
-
@Test
|
62
|
-
public void
|
62
|
+
@Test
|
63
|
+
public void getAncestorJsonColumnList()
|
63
64
|
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
65
|
+
ArrayList<JsonColumn> subject;
|
66
|
+
|
67
|
+
subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default");
|
68
|
+
assertEquals("$['json1']", subject.get(0).getPath());
|
69
|
+
assertTrue(subject.get(0).getDefaultValue().isMapValue());
|
70
|
+
assertEquals("$['json1']['a']", subject.get(1).getPath());
|
71
|
+
assertTrue(subject.get(1).getDefaultValue().isMapValue());
|
72
|
+
|
73
|
+
subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a[0].default");
|
74
|
+
assertEquals("$['json1']", subject.get(0).getPath());
|
75
|
+
assertTrue(subject.get(0).getDefaultValue().isMapValue());
|
76
|
+
assertEquals("$['json1']['a']", subject.get(1).getPath());
|
77
|
+
assertTrue(subject.get(1).getDefaultValue().isArrayValue());
|
78
|
+
assertEquals("$['json1']['a'][0]", subject.get(2).getPath());
|
79
|
+
assertTrue(subject.get(2).getDefaultValue().isMapValue());
|
80
|
+
|
81
|
+
subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default[0]");
|
82
|
+
assertEquals("$['json1']", subject.get(0).getPath());
|
83
|
+
assertTrue(subject.get(0).getDefaultValue().isMapValue());
|
84
|
+
assertEquals("$['json1']['a']", subject.get(1).getPath());
|
85
|
+
assertTrue(subject.get(1).getDefaultValue().isMapValue());
|
86
|
+
assertEquals("$['json1']['a']['default']", subject.get(2).getPath());
|
87
|
+
assertTrue(subject.get(2).getDefaultValue().isArrayValue());
|
71
88
|
}
|
72
89
|
|
73
90
|
@Test
|
@@ -103,7 +120,7 @@ public class TestJsonVisitor
|
|
103
120
|
}
|
104
121
|
|
105
122
|
@Test
|
106
|
-
public void
|
123
|
+
public void buildJsonDropColumns()
|
107
124
|
{
|
108
125
|
PluginTask task = taskFromYamlString(
|
109
126
|
"type: column",
|
@@ -135,8 +152,32 @@ public class TestJsonVisitor
|
|
135
152
|
}
|
136
153
|
}
|
137
154
|
|
155
|
+
@Test(expected = ConfigException.class)
|
156
|
+
public void configException_Columns()
|
157
|
+
{
|
158
|
+
PluginTask task = taskFromYamlString(
|
159
|
+
"type: column",
|
160
|
+
"columns:",
|
161
|
+
" - {name: \"$.json1.b.b[*]\"}");
|
162
|
+
Schema inputSchema = Schema.builder().build();
|
163
|
+
// b[*] should be written as b
|
164
|
+
jsonVisitor(task, inputSchema);
|
165
|
+
}
|
166
|
+
|
167
|
+
@Test(expected = ConfigException.class)
|
168
|
+
public void buildJsonAddColumns_ConfigException()
|
169
|
+
{
|
170
|
+
PluginTask task = taskFromYamlString(
|
171
|
+
"type: column",
|
172
|
+
"add_columns:",
|
173
|
+
" - {name: \"$.json1.b.b[*]\", type: json, default: []}");
|
174
|
+
Schema inputSchema = Schema.builder().build();
|
175
|
+
// b[*] should be written as b
|
176
|
+
jsonVisitor(task, inputSchema);
|
177
|
+
}
|
178
|
+
|
138
179
|
@Test
|
139
|
-
public void
|
180
|
+
public void buildJsonAddColumns()
|
140
181
|
{
|
141
182
|
PluginTask task = taskFromYamlString(
|
142
183
|
"type: column",
|
@@ -150,19 +191,22 @@ public class TestJsonVisitor
|
|
150
191
|
.build();
|
151
192
|
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
152
193
|
|
153
|
-
|
194
|
+
assertTrue(subject.jsonAddColumns.containsKey("$"));
|
195
|
+
assertTrue(subject.jsonAddColumns.containsKey("$['json1']"));
|
154
196
|
assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']"));
|
155
197
|
assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']['copy_array']"));
|
156
198
|
|
157
199
|
{
|
158
200
|
HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$['json1']['a']");
|
159
|
-
assertEquals(
|
201
|
+
assertEquals(3, jsonColumns.size());
|
160
202
|
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
161
203
|
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
162
204
|
assertEquals("$['json1']['a']['default']", keys[0]);
|
163
205
|
assertEquals("$['json1']['a']['default']", values[0].getPath());
|
164
206
|
assertEquals("$['json1']['a']['copy']", keys[1]);
|
165
207
|
assertEquals("$['json1']['a']['copy']", values[1].getPath());
|
208
|
+
assertEquals("$['json1']['a']['copy_array']", keys[2]);
|
209
|
+
assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
|
166
210
|
}
|
167
211
|
|
168
212
|
{
|
@@ -175,8 +219,20 @@ public class TestJsonVisitor
|
|
175
219
|
}
|
176
220
|
}
|
177
221
|
|
222
|
+
@Test(expected = ConfigException.class)
|
223
|
+
public void buildJsonColumns_ConfigException()
|
224
|
+
{
|
225
|
+
PluginTask task = taskFromYamlString(
|
226
|
+
"type: column",
|
227
|
+
"columns:",
|
228
|
+
" - {name: \"$.json1.b.b[*]\"}");
|
229
|
+
Schema inputSchema = Schema.builder().build();
|
230
|
+
// b[*] should be written as b
|
231
|
+
jsonVisitor(task, inputSchema);
|
232
|
+
}
|
233
|
+
|
178
234
|
@Test
|
179
|
-
public void
|
235
|
+
public void buildJsonColumns()
|
180
236
|
{
|
181
237
|
PluginTask task = taskFromYamlString(
|
182
238
|
"type: column",
|
@@ -190,19 +246,23 @@ public class TestJsonVisitor
|
|
190
246
|
.build();
|
191
247
|
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
192
248
|
|
193
|
-
|
249
|
+
// 1st level keys are parents of jsonpath
|
250
|
+
assertTrue(subject.jsonColumns.containsKey("$"));
|
251
|
+
assertTrue(subject.jsonColumns.containsKey("$['json1']"));
|
194
252
|
assertTrue(subject.jsonColumns.containsKey("$['json1']['a']"));
|
195
253
|
assertTrue(subject.jsonColumns.containsKey("$['json1']['a']['copy_array']"));
|
196
254
|
|
197
255
|
{
|
198
256
|
HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$['json1']['a']");
|
199
|
-
assertEquals(
|
257
|
+
assertEquals(3, jsonColumns.size());
|
200
258
|
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
201
259
|
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
202
260
|
assertEquals("$['json1']['a']['default']", keys[0]);
|
203
261
|
assertEquals("$['json1']['a']['default']", values[0].getPath());
|
204
262
|
assertEquals("$['json1']['a']['copy']", keys[1]);
|
205
263
|
assertEquals("$['json1']['a']['copy']", values[1].getPath());
|
264
|
+
assertEquals("$['json1']['a']['copy_array']", keys[2]);
|
265
|
+
assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
|
206
266
|
}
|
207
267
|
|
208
268
|
{
|
@@ -216,7 +276,7 @@ public class TestJsonVisitor
|
|
216
276
|
}
|
217
277
|
|
218
278
|
@Test
|
219
|
-
public void
|
279
|
+
public void buildJsonSchema()
|
220
280
|
{
|
221
281
|
PluginTask task = taskFromYamlString(
|
222
282
|
"type: column",
|
@@ -269,7 +329,6 @@ public class TestJsonVisitor
|
|
269
329
|
PluginTask task = taskFromYamlString(
|
270
330
|
"type: column",
|
271
331
|
"add_columns:",
|
272
|
-
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
273
332
|
" - {name: $.json1.k3.k3, type: string, default: v}",
|
274
333
|
" - {name: $.json1.k4, src: $.json1.k2}");
|
275
334
|
Schema inputSchema = Schema.builder()
|
@@ -297,8 +356,7 @@ public class TestJsonVisitor
|
|
297
356
|
"type: column",
|
298
357
|
"columns:",
|
299
358
|
" - {name: $.json1.k1}",
|
300
|
-
" - {name: $.json1.k2.k2}",
|
301
|
-
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
359
|
+
" - {name: $.json1.k2.k2}",
|
302
360
|
" - {name: $.json1.k3.k3, type: string, default: v}",
|
303
361
|
" - {name: $.json1.k4, src: $.json1.k2}");
|
304
362
|
Schema inputSchema = Schema.builder()
|
@@ -316,7 +374,7 @@ public class TestJsonVisitor
|
|
316
374
|
k2, ValueFactory.newMap(k2, v));
|
317
375
|
|
318
376
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
319
|
-
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
377
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
320
378
|
}
|
321
379
|
|
322
380
|
@Test
|
@@ -352,8 +410,6 @@ public class TestJsonVisitor
|
|
352
410
|
"type: column",
|
353
411
|
"add_columns:",
|
354
412
|
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
355
|
-
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
356
|
-
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
357
413
|
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
358
414
|
Schema inputSchema = Schema.builder()
|
359
415
|
.add("json1", JSON)
|
@@ -379,11 +435,8 @@ public class TestJsonVisitor
|
|
379
435
|
PluginTask task = taskFromYamlString(
|
380
436
|
"type: column",
|
381
437
|
"columns:",
|
382
|
-
" - {name: \"$.json1.k1\"}",
|
383
438
|
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
384
|
-
" - {name: \"$.json1.k2[0]\"}",
|
385
|
-
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
386
|
-
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
439
|
+
" - {name: \"$.json1.k2[0]\"}",
|
387
440
|
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
388
441
|
Schema inputSchema = Schema.builder()
|
389
442
|
.add("json1", JSON)
|
@@ -400,7 +453,7 @@ public class TestJsonVisitor
|
|
400
453
|
k2, ValueFactory.newArray(v, v));
|
401
454
|
|
402
455
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
403
|
-
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
456
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
404
457
|
}
|
405
458
|
|
406
459
|
@Test
|
@@ -435,7 +488,6 @@ public class TestJsonVisitor
|
|
435
488
|
PluginTask task = taskFromYamlString(
|
436
489
|
"type: column",
|
437
490
|
"add_columns:",
|
438
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
|
439
491
|
" - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
|
440
492
|
" - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
|
441
493
|
Schema inputSchema = Schema.builder()
|
@@ -464,7 +516,6 @@ public class TestJsonVisitor
|
|
464
516
|
"columns:",
|
465
517
|
" - {name: \"$['json1']['k1']\"}",
|
466
518
|
" - {name: \"$['json1']['k2']['k2']\"}",
|
467
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
|
468
519
|
" - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
|
469
520
|
" - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
|
470
521
|
Schema inputSchema = Schema.builder()
|
@@ -481,7 +532,7 @@ public class TestJsonVisitor
|
|
481
532
|
k2, ValueFactory.newMap(k2, v));
|
482
533
|
|
483
534
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
484
|
-
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
535
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
485
536
|
}
|
486
537
|
|
487
538
|
@Test
|
@@ -517,8 +568,6 @@ public class TestJsonVisitor
|
|
517
568
|
"type: column",
|
518
569
|
"add_columns:",
|
519
570
|
" - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
|
520
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
|
521
|
-
" - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
|
522
571
|
" - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
|
523
572
|
Schema inputSchema = Schema.builder()
|
524
573
|
.add("json1", JSON)
|
@@ -544,11 +593,8 @@ public class TestJsonVisitor
|
|
544
593
|
PluginTask task = taskFromYamlString(
|
545
594
|
"type: column",
|
546
595
|
"columns:",
|
547
|
-
" - {name: \"$['json1']['k1']\"}",
|
548
596
|
" - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
|
549
597
|
" - {name: \"$['json1']['k2'][0]\"}",
|
550
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
|
551
|
-
" - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
|
552
598
|
" - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
|
553
599
|
Schema inputSchema = Schema.builder()
|
554
600
|
.add("json1", JSON)
|
@@ -564,7 +610,7 @@ public class TestJsonVisitor
|
|
564
610
|
k2, ValueFactory.newArray(v, v));
|
565
611
|
|
566
612
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
567
|
-
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
613
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
568
614
|
}
|
569
615
|
|
570
616
|
// Because the dot notation is converted to single quotes by default,
|
@@ -652,13 +698,14 @@ public class TestJsonVisitor
|
|
652
698
|
assertEquals("{\"k____1\":[{\"k____1\":\"v\"}],\"k_2\":{\"k_2\":\"v\"}}", visited.toString());
|
653
699
|
}
|
654
700
|
|
701
|
+
/*
|
655
702
|
@Test
|
656
703
|
public void visit_withColumnNameIncludingSingleQuotes()
|
657
704
|
{
|
658
705
|
PluginTask task = taskFromYamlString(
|
659
706
|
"type: column",
|
660
707
|
"columns:",
|
661
|
-
" - {name: \"$[
|
708
|
+
" - {name: \"$['\\\\'json1']['k1']\"}");
|
662
709
|
Schema inputSchema = Schema.builder()
|
663
710
|
.add("'json1", JSON)
|
664
711
|
.build();
|
@@ -672,9 +719,10 @@ public class TestJsonVisitor
|
|
672
719
|
MapValue visited = subject.visit("$['\\'json1']", map).asMapValue();
|
673
720
|
assertEquals("{\"k1\":\"v\"}", visited.toString());
|
674
721
|
}
|
722
|
+
*/
|
675
723
|
|
676
724
|
@Test(expected = ConfigException.class)
|
677
|
-
public void
|
725
|
+
public void configException_MultiProperties() {
|
678
726
|
PluginTask task = taskFromYamlString(
|
679
727
|
"type: column",
|
680
728
|
"columns:",
|
@@ -687,7 +735,7 @@ public class TestJsonVisitor
|
|
687
735
|
|
688
736
|
// It is recognized multi properties if the square brackets does not close properly
|
689
737
|
@Test(expected = ConfigException.class)
|
690
|
-
public void
|
738
|
+
public void configException_PropertyIsNotSeparatedByCommas()
|
691
739
|
{
|
692
740
|
PluginTask task = taskFromYamlString(
|
693
741
|
"type: column",
|
@@ -700,7 +748,7 @@ public class TestJsonVisitor
|
|
700
748
|
}
|
701
749
|
|
702
750
|
@Test(expected = ConfigException.class)
|
703
|
-
public void
|
751
|
+
public void configException_FunctionPathToken()
|
704
752
|
{
|
705
753
|
PluginTask task = taskFromYamlString(
|
706
754
|
"type: column",
|
@@ -713,7 +761,7 @@ public class TestJsonVisitor
|
|
713
761
|
}
|
714
762
|
|
715
763
|
@Test(expected = ConfigException.class)
|
716
|
-
public void
|
764
|
+
public void configException_PredicatePathToken()
|
717
765
|
{
|
718
766
|
PluginTask task = taskFromYamlString(
|
719
767
|
"type: column",
|
@@ -726,7 +774,7 @@ public class TestJsonVisitor
|
|
726
774
|
}
|
727
775
|
|
728
776
|
@Test(expected = ConfigException.class)
|
729
|
-
public void
|
777
|
+
public void configException_ScanPathToken()
|
730
778
|
{
|
731
779
|
PluginTask task = taskFromYamlString(
|
732
780
|
"type: column",
|
@@ -739,7 +787,7 @@ public class TestJsonVisitor
|
|
739
787
|
}
|
740
788
|
|
741
789
|
@Test(expected = ConfigException.class)
|
742
|
-
public void
|
790
|
+
public void configException_MultiIndexOperation()
|
743
791
|
{
|
744
792
|
PluginTask task = taskFromYamlString(
|
745
793
|
"type: column",
|
@@ -752,7 +800,7 @@ public class TestJsonVisitor
|
|
752
800
|
}
|
753
801
|
|
754
802
|
@Test(expected = ConfigException.class)
|
755
|
-
public void
|
803
|
+
public void configException_IndexOperationAtMiddlePosition()
|
756
804
|
{
|
757
805
|
PluginTask task = taskFromYamlString(
|
758
806
|
"type: column",
|
@@ -765,7 +813,7 @@ public class TestJsonVisitor
|
|
765
813
|
}
|
766
814
|
|
767
815
|
@Test(expected = ConfigException.class)
|
768
|
-
public void
|
816
|
+
public void configException_ArraySliceOperation()
|
769
817
|
{
|
770
818
|
PluginTask task = taskFromYamlString(
|
771
819
|
"type: column",
|
@@ -778,7 +826,7 @@ public class TestJsonVisitor
|
|
778
826
|
}
|
779
827
|
|
780
828
|
@Test(expected = ConfigException.class)
|
781
|
-
public void
|
829
|
+
public void configException_MArraySliceOperationAtMiddlePosition()
|
782
830
|
{
|
783
831
|
PluginTask task = taskFromYamlString(
|
784
832
|
"type: column",
|
@@ -794,7 +842,7 @@ public class TestJsonVisitor
|
|
794
842
|
public ExpectedException thrown = ExpectedException.none();
|
795
843
|
|
796
844
|
@Test
|
797
|
-
public void
|
845
|
+
public void configException_PathCompileError()
|
798
846
|
{
|
799
847
|
PluginTask task = taskFromYamlString(
|
800
848
|
"type: column",
|
@@ -808,4 +856,4 @@ public class TestJsonVisitor
|
|
808
856
|
|
809
857
|
jsonVisitor(task, inputSchema);
|
810
858
|
}
|
811
|
-
}
|
859
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.0.
|
4
|
+
version: 0.6.0.pre3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,9 +52,13 @@ files:
|
|
52
52
|
- README.md
|
53
53
|
- build.gradle
|
54
54
|
- config/checkstyle/checkstyle.xml
|
55
|
+
- example/add_columns.txt
|
55
56
|
- example/add_columns.yml
|
57
|
+
- example/bracket_notations.txt
|
56
58
|
- example/bracket_notations.yml
|
59
|
+
- example/columns.txt
|
57
60
|
- example/columns.yml
|
61
|
+
- example/drop_columns.txt
|
58
62
|
- example/drop_columns.yml
|
59
63
|
- example/edgecase.tsv
|
60
64
|
- example/edgecase.yml
|
@@ -69,8 +73,8 @@ files:
|
|
69
73
|
- src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java
|
70
74
|
- src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
|
71
75
|
- src/main/java/org/embulk/filter/column/JsonColumn.java
|
76
|
+
- src/main/java/org/embulk/filter/column/JsonPathUtil.java
|
72
77
|
- src/main/java/org/embulk/filter/column/JsonVisitor.java
|
73
|
-
- src/main/java/org/embulk/filter/column/PathTokenUtil.java
|
74
78
|
- src/test/java/org/embulk/filter/column/TestColumnFilterPlugin.java
|
75
79
|
- src/test/java/org/embulk/filter/column/TestColumnVisitorImpl.java
|
76
80
|
- src/test/java/org/embulk/filter/column/TestJsonColumn.java
|
@@ -78,9 +82,9 @@ files:
|
|
78
82
|
- classpath/accessors-smart-1.1.jar
|
79
83
|
- classpath/asm-5.0.3.jar
|
80
84
|
- classpath/commons-lang3-3.4.jar
|
81
|
-
- classpath/embulk-filter-column-0.6.0.
|
85
|
+
- classpath/embulk-filter-column-0.6.0.pre3.jar
|
82
86
|
- classpath/json-smart-2.2.1.jar
|
83
|
-
- classpath/JsonPathCompiler-0.0.
|
87
|
+
- classpath/JsonPathCompiler-0.0.11.jar
|
84
88
|
- classpath/slf4j-api-1.7.21.jar
|
85
89
|
homepage: https://github.com/sonots/embulk-filter-column
|
86
90
|
licenses:
|
@@ -1,39 +0,0 @@
|
|
1
|
-
package org.embulk.filter.column;
|
2
|
-
|
3
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
4
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayPathToken;
|
5
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.FunctionPathToken;
|
6
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
|
7
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.PredicatePathToken;
|
8
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.ScanPathToken;
|
9
|
-
import org.embulk.config.ConfigException;
|
10
|
-
|
11
|
-
public class PathTokenUtil
|
12
|
-
{
|
13
|
-
public static void assertSupportedPathToken(PathToken pathToken, String path)
|
14
|
-
{
|
15
|
-
if (pathToken instanceof ArrayPathToken) {
|
16
|
-
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
|
17
|
-
assertSupportedArrayPathToken(arrayIndexOperation, path);
|
18
|
-
}
|
19
|
-
else if (pathToken instanceof ScanPathToken) {
|
20
|
-
throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
|
21
|
-
}
|
22
|
-
else if (pathToken instanceof FunctionPathToken) {
|
23
|
-
throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
|
24
|
-
}
|
25
|
-
else if (pathToken instanceof PredicatePathToken) {
|
26
|
-
throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
|
31
|
-
{
|
32
|
-
if (arrayIndexOperation == null) {
|
33
|
-
throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
|
34
|
-
}
|
35
|
-
else if (!arrayIndexOperation.isSingleIndexOperation()) {
|
36
|
-
throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
|
37
|
-
}
|
38
|
-
}
|
39
|
-
}
|