embulk-filter-column 0.6.0.pre2 → 0.6.0.pre3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -1
- data/README.md +2 -14
- data/build.gradle +2 -2
- data/example/add_columns.txt +11 -0
- data/example/add_columns.yml +1 -1
- data/example/bracket_notations.txt +11 -0
- data/example/columns.txt +11 -0
- data/example/drop_columns.txt +11 -0
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +1 -1
- data/src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java +1 -1
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +13 -16
- data/src/main/java/org/embulk/filter/column/JsonPathUtil.java +76 -0
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +173 -71
- data/src/test/java/org/embulk/filter/column/TestJsonColumn.java +8 -8
- data/src/test/java/org/embulk/filter/column/TestJsonVisitor.java +98 -50
- metadata +9 -5
- data/src/main/java/org/embulk/filter/column/PathTokenUtil.java +0 -39
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d26c69b6a7bfb4a6c4d68c6967301a0dc32562f5
|
4
|
+
data.tar.gz: c674640721b35540a9aba9aef1ad76c5cb5adc96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cc5e8165d85afed48b65302d0baa9f54e35b1ae5438f390b5b5970d8342e14cf72b30f137ca901229578c3493b23a4c3a6286ea076501bc926eaed97491f70f
|
7
|
+
data.tar.gz: 867fa85522091768287f5b4ade0b7e17947899e1f6ccbeeb017d5115fb0468d418c62712af308a80580afe1be6bb5a2f3ac73c7c1274b11d4c674dd8c8635a66
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -133,27 +133,15 @@ NOTE:
|
|
133
133
|
|
134
134
|
NOTE:
|
135
135
|
|
136
|
-
|
136
|
+
Rename or copy of json paths by `src` option is only partially supported yet. The parent json path must be same like:
|
137
137
|
|
138
138
|
```
|
139
|
-
- (name: $.payload.foo}
|
140
|
-
- {name: $.payload.foo.bar}
|
141
|
-
```
|
142
|
-
|
143
|
-
NOTE:
|
144
|
-
|
145
|
-
`src` (to rename or copy columns) is only partially supported yet. The upper json path must be same like:
|
146
|
-
|
147
|
-
```
|
148
|
-
- {name: $.payload.foo}
|
149
139
|
- {name: $.payload.foo.dest, src: $.payload.foo.src}
|
150
140
|
```
|
151
141
|
|
152
|
-
|
142
|
+
I mean that below example does not work yet (`$.payload.foo` and `$.payload.bar`)
|
153
143
|
|
154
144
|
```
|
155
|
-
- {name: $.payload.foo}
|
156
|
-
- {name: $.payload.bar}
|
157
145
|
- {name: $.payload.foo.dest, src: $.payload.bar.src}
|
158
146
|
```
|
159
147
|
|
data/build.gradle
CHANGED
@@ -15,14 +15,14 @@ configurations {
|
|
15
15
|
provided
|
16
16
|
}
|
17
17
|
|
18
|
-
version = "0.6.0.
|
18
|
+
version = "0.6.0.pre3"
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.+"
|
24
24
|
provided "org.embulk:embulk-core:0.8.+"
|
25
|
-
compile
|
25
|
+
compile "io.github.medjed:JsonPathCompiler:0.0.+"
|
26
26
|
|
27
27
|
testCompile "junit:junit:4.+"
|
28
28
|
testCompile "org.embulk:embulk-core:0.8.+:tests"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:29:31.933 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:29:32.859 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:29:32.876 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:29:32.882 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
|
6
|
+
| time:timestamp | id:long | name:string | score:double | json:json | foo:long | d:timestamp | t:timestamp | copy_score:double |
|
7
|
+
+-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
|
8
|
+
| 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo","bar":"bar","d":"2015-07-13","copy_foo":"foo"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
|
9
|
+
| 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo2","bar":1,"d":"2015-07-13","copy_foo":"foo2"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
|
10
|
+
| | | | 9170.0 | | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 9170.0 |
|
11
|
+
+-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
|
data/example/add_columns.yml
CHANGED
@@ -23,7 +23,7 @@ filters:
|
|
23
23
|
- {name: d, type: timestamp, default: "2015-07-13"}
|
24
24
|
- {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
|
25
25
|
- {name: copy_score, src: score}
|
26
|
-
- {name: $.json.
|
26
|
+
- {name: $.json.bar, type: long, default: 1}
|
27
27
|
- {name: $.json.d, type: string, default: "2015-07-13"}
|
28
28
|
- {name: $.json.copy_foo, src: $.json.foo}
|
29
29
|
out:
|
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:29:47.367 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:29:48.254 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:29:48.270 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:29:48.274 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
|
6
|
+
| time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
|
7
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
|
8
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
|
9
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
|
10
|
+
| 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO","array_a":[{"a":"default0"}],"array_b":[{"b":"default"},{"b":"default"}]} |
|
11
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
|
data/example/columns.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:30:03.618 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:30:04.499 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:30:04.516 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:30:04.521 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
|
6
|
+
| time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
|
7
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
|
8
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo"} |
|
9
|
+
| 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2"} |
|
10
|
+
| 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO"} |
|
11
|
+
+-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
2016-10-20 02:30:16.407 +0900: Embulk v0.8.6
|
2
|
+
2016-10-20 02:30:17.290 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
|
3
|
+
2016-10-20 02:30:17.305 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
|
4
|
+
2016-10-20 02:30:17.310 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
|
5
|
+
+---------------------------------------------+--------------+---------------+
|
6
|
+
| name:string | score:double | json:json |
|
7
|
+
+---------------------------------------------+--------------+---------------+
|
8
|
+
| Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"bar":"bar"} |
|
9
|
+
| Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {} |
|
10
|
+
| | 9170.0 | |
|
11
|
+
+---------------------------------------------+--------------+---------------+
|
@@ -27,7 +27,7 @@ import org.slf4j.Logger;
|
|
27
27
|
|
28
28
|
import java.util.List;
|
29
29
|
|
30
|
-
import
|
30
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
31
31
|
|
32
32
|
public class ColumnFilterPlugin implements FilterPlugin
|
33
33
|
{
|
@@ -1,6 +1,6 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.Utils;
|
4
4
|
import com.google.common.base.Throwables;
|
5
5
|
|
6
6
|
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
@@ -1,12 +1,12 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
8
|
-
import
|
9
|
-
import
|
3
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
8
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
|
10
10
|
import org.embulk.config.ConfigException;
|
11
11
|
import org.embulk.spi.type.Type;
|
12
12
|
import org.msgpack.value.StringValue;
|
@@ -56,17 +56,14 @@ public class JsonColumn
|
|
56
56
|
this.pathValue = ValueFactory.newString(path);
|
57
57
|
this.parentPath = compiledPath.getParentPath();
|
58
58
|
|
59
|
-
|
60
|
-
throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", path));
|
61
|
-
}
|
62
|
-
this.tailIndex = tailIndex(compiledRoot);
|
59
|
+
this.tailIndex = getTailIndex(compiledRoot);
|
63
60
|
this.parentPathValue = ValueFactory.newString(parentPath);
|
64
61
|
String tailName = getTailName(compiledRoot);
|
65
62
|
this.tailNameValue = tailName == null ? ValueFactory.newNil() : ValueFactory.newString(tailName);
|
66
63
|
|
67
64
|
this.srcValue = ValueFactory.newString(this.src);
|
68
65
|
this.srcParentPath = compiledSrc.getParentPath();
|
69
|
-
this.srcTailIndex =
|
66
|
+
this.srcTailIndex = getTailIndex(compiledSrcRoot);
|
70
67
|
this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
|
71
68
|
String srcTailName = getTailName(compiledSrcRoot);
|
72
69
|
this.srcTailNameValue = srcTailName == null ? ValueFactory.newNil() : ValueFactory.newString(srcTailName);
|
@@ -92,12 +89,12 @@ public class JsonColumn
|
|
92
89
|
}
|
93
90
|
}
|
94
91
|
|
95
|
-
private Long
|
92
|
+
private Long getTailIndex(RootPathToken root)
|
96
93
|
{
|
97
94
|
PathToken tail = root.getTail();
|
98
95
|
if (tail instanceof ArrayPathToken) {
|
99
96
|
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) tail).getArrayIndexOperation();
|
100
|
-
|
97
|
+
JsonPathUtil.assertSupportedArrayPathToken(arrayIndexOperation, path);
|
101
98
|
return arrayIndexOperation.indexes().get(0).longValue();
|
102
99
|
}
|
103
100
|
else {
|
@@ -135,7 +132,7 @@ public class JsonColumn
|
|
135
132
|
return parentPath;
|
136
133
|
}
|
137
134
|
|
138
|
-
public Long
|
135
|
+
public Long getTailIndex()
|
139
136
|
{
|
140
137
|
return tailIndex;
|
141
138
|
}
|
@@ -186,7 +183,7 @@ public class JsonColumn
|
|
186
183
|
return ((RootPathToken) PathCompiler.compile(path).getRoot()).getTailPath();
|
187
184
|
}
|
188
185
|
|
189
|
-
public static Long
|
186
|
+
public static Long getTailIndex(String path)
|
190
187
|
{
|
191
188
|
Path compiledPath = PathCompiler.compile(path);
|
192
189
|
PathToken tail = ((RootPathToken) compiledPath.getRoot()).getTail();
|
@@ -0,0 +1,76 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import io.github.medjed.jsonpathcompiler.InvalidPathException;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
|
8
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
9
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
10
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
|
11
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
|
12
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
|
13
|
+
import org.embulk.config.ConfigException;
|
14
|
+
|
15
|
+
public class JsonPathUtil
|
16
|
+
{
|
17
|
+
public static void assertJsonPathFormat(String path)
|
18
|
+
{
|
19
|
+
Path compiledPath;
|
20
|
+
try {
|
21
|
+
compiledPath = PathCompiler.compile(path);
|
22
|
+
}
|
23
|
+
catch (InvalidPathException e) {
|
24
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
25
|
+
}
|
26
|
+
PathToken pathToken = compiledPath.getRoot();
|
27
|
+
while (true) {
|
28
|
+
assertSupportedPathToken(pathToken, path);
|
29
|
+
if (pathToken.isLeaf()) {
|
30
|
+
break;
|
31
|
+
}
|
32
|
+
pathToken = pathToken.next();
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
public static void assertSupportedPathToken(PathToken pathToken, String path)
|
37
|
+
{
|
38
|
+
if (pathToken instanceof ArrayPathToken) {
|
39
|
+
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
|
40
|
+
assertSupportedArrayPathToken(arrayIndexOperation, path);
|
41
|
+
}
|
42
|
+
else if (pathToken instanceof ScanPathToken) {
|
43
|
+
throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
|
44
|
+
}
|
45
|
+
else if (pathToken instanceof FunctionPathToken) {
|
46
|
+
throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
|
47
|
+
}
|
48
|
+
else if (pathToken instanceof PredicatePathToken) {
|
49
|
+
throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
|
54
|
+
{
|
55
|
+
if (arrayIndexOperation == null) {
|
56
|
+
throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
|
57
|
+
}
|
58
|
+
else if (!arrayIndexOperation.isSingleIndexOperation()) {
|
59
|
+
throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
public static void assertDoNotEndsWithArrayWildcard(String path)
|
64
|
+
{
|
65
|
+
Path compiledPath;
|
66
|
+
try {
|
67
|
+
compiledPath = PathCompiler.compile(path);
|
68
|
+
}
|
69
|
+
catch (InvalidPathException e) {
|
70
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
71
|
+
}
|
72
|
+
if (((RootPathToken) compiledPath.getRoot()).getTailPath().equals("[*]")) {
|
73
|
+
throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", compiledPath.toString()));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
@@ -1,9 +1,10 @@
|
|
1
1
|
package org.embulk.filter.column;
|
2
2
|
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
3
|
+
import io.github.medjed.jsonpathcompiler.InvalidPathException;
|
4
|
+
import io.github.medjed.jsonpathcompiler.expressions.Path;
|
5
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
|
6
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
|
7
|
+
import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
|
7
8
|
import org.embulk.config.ConfigException;
|
8
9
|
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
9
10
|
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
@@ -18,6 +19,7 @@ import org.embulk.spi.type.LongType;
|
|
18
19
|
import org.embulk.spi.type.StringType;
|
19
20
|
import org.embulk.spi.type.TimestampType;
|
20
21
|
import org.embulk.spi.type.Type;
|
22
|
+
import org.embulk.spi.type.Types;
|
21
23
|
import org.msgpack.value.ArrayValue;
|
22
24
|
import org.msgpack.value.MapValue;
|
23
25
|
import org.msgpack.value.Value;
|
@@ -38,9 +40,13 @@ public class JsonVisitor
|
|
38
40
|
final PluginTask task;
|
39
41
|
final Schema inputSchema;
|
40
42
|
final Schema outputSchema;
|
43
|
+
// jsonpath
|
41
44
|
final HashSet<String> shouldVisitSet = new HashSet<>();
|
45
|
+
// parent jsonpath => { jsonpath => json column }
|
42
46
|
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
|
47
|
+
// parent jsonpath => { jsonpath => json column }
|
43
48
|
final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
|
49
|
+
// parent jsonpath => [ jsonpath ]
|
44
50
|
final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
|
45
51
|
|
46
52
|
JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
|
@@ -92,6 +98,18 @@ public class JsonVisitor
|
|
92
98
|
jsonColumns.get(parentPath).put(compiledPath.toString(), value);
|
93
99
|
}
|
94
100
|
|
101
|
+
private boolean jsonColumnsContainsKey(String path)
|
102
|
+
{
|
103
|
+
Path compiledPath = PathCompiler.compile(path);
|
104
|
+
String parentPath = compiledPath.getParentPath();
|
105
|
+
if (jsonColumns.containsKey(parentPath)) {
|
106
|
+
return jsonColumns.get(parentPath).containsKey(compiledPath.toString());
|
107
|
+
}
|
108
|
+
else {
|
109
|
+
return false;
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
95
113
|
private void jsonAddColumnsPut(String path, JsonColumn value)
|
96
114
|
{
|
97
115
|
Path compiledPath = PathCompiler.compile(path);
|
@@ -102,6 +120,18 @@ public class JsonVisitor
|
|
102
120
|
jsonAddColumns.get(parentPath).put(compiledPath.toString(), value);
|
103
121
|
}
|
104
122
|
|
123
|
+
private boolean jsonAddColumnsContainsKey(String path)
|
124
|
+
{
|
125
|
+
Path compiledPath = PathCompiler.compile(path);
|
126
|
+
String parentPath = compiledPath.getParentPath();
|
127
|
+
if (jsonAddColumns.containsKey(parentPath)) {
|
128
|
+
return jsonAddColumns.get(parentPath).containsKey(compiledPath.toString());
|
129
|
+
}
|
130
|
+
else {
|
131
|
+
return false;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
105
135
|
private void jsonDropColumnsPut(String path)
|
106
136
|
{
|
107
137
|
Path compiledPath = PathCompiler.compile(path);
|
@@ -112,68 +142,98 @@ public class JsonVisitor
|
|
112
142
|
jsonDropColumns.get(parentPath).add(compiledPath.toString());
|
113
143
|
}
|
114
144
|
|
115
|
-
|
116
|
-
private void buildJsonSchema()
|
145
|
+
private void buildJsonColumns()
|
117
146
|
{
|
118
147
|
List<ColumnConfig> columns = task.getColumns();
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
148
|
+
for (ColumnConfig column : columns) {
|
149
|
+
String name = column.getName();
|
150
|
+
// skip NON json path notation to build output schema
|
151
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
152
|
+
continue;
|
153
|
+
}
|
154
|
+
JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
|
155
|
+
// automatically fill ancestor jsonpaths
|
156
|
+
for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
|
157
|
+
String ancestorJsonPath = ancestorJsonColumn.getPath();
|
158
|
+
if (!jsonColumnsContainsKey(ancestorJsonPath)) {
|
159
|
+
jsonColumnsPut(ancestorJsonPath, ancestorJsonColumn);
|
129
160
|
}
|
130
|
-
|
161
|
+
}
|
162
|
+
// leaf jsonpath
|
163
|
+
if (column.getSrc().isPresent()) {
|
164
|
+
String src = column.getSrc().get();
|
165
|
+
jsonColumnsPut(name, new JsonColumn(name, null, null, src));
|
166
|
+
}
|
167
|
+
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
168
|
+
Type type = column.getType().get();
|
169
|
+
Value defaultValue = getDefault(task, name, type, column);
|
170
|
+
jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
171
|
+
}
|
172
|
+
else {
|
173
|
+
Type type = column.getType().isPresent() ? column.getType().get() : null;
|
174
|
+
jsonColumnsPut(name, new JsonColumn(name, type));
|
131
175
|
}
|
132
176
|
}
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
Type type = column.getType().isPresent() ? column.getType().get() : null;
|
151
|
-
jsonColumnsPut(name, new JsonColumn(name, type));
|
177
|
+
}
|
178
|
+
|
179
|
+
private void buildJsonAddColumns()
|
180
|
+
{
|
181
|
+
List<ColumnConfig> addColumns = task.getAddColumns();
|
182
|
+
for (ColumnConfig column : addColumns) {
|
183
|
+
String name = column.getName();
|
184
|
+
// skip NON json path notation to build output schema
|
185
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
186
|
+
continue;
|
187
|
+
}
|
188
|
+
JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
|
189
|
+
// automatically fill ancestor jsonpaths
|
190
|
+
for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
|
191
|
+
String ancestorJsonPath = ancestorJsonColumn.getPath();
|
192
|
+
if (!jsonAddColumnsContainsKey(ancestorJsonPath)) {
|
193
|
+
jsonAddColumnsPut(ancestorJsonPath, ancestorJsonColumn);
|
152
194
|
}
|
153
195
|
}
|
196
|
+
// leaf jsonpath
|
197
|
+
if (column.getSrc().isPresent()) {
|
198
|
+
String src = column.getSrc().get();
|
199
|
+
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
200
|
+
}
|
201
|
+
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
202
|
+
Type type = column.getType().get();
|
203
|
+
Value defaultValue = getDefault(task, name, type, column);
|
204
|
+
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
205
|
+
}
|
206
|
+
else {
|
207
|
+
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
|
208
|
+
}
|
154
209
|
}
|
210
|
+
}
|
155
211
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
if (column.getSrc().isPresent()) {
|
165
|
-
String src = column.getSrc().get();
|
166
|
-
jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
|
167
|
-
}
|
168
|
-
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
169
|
-
Type type = column.getType().get();
|
170
|
-
Value defaultValue = getDefault(task, name, type, column);
|
171
|
-
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
172
|
-
}
|
173
|
-
else {
|
174
|
-
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
|
175
|
-
}
|
212
|
+
private void buildJsonDropColumns()
|
213
|
+
{
|
214
|
+
List<ColumnConfig> dropColumns = task.getDropColumns();
|
215
|
+
for (ColumnConfig dropColumn : dropColumns) {
|
216
|
+
String name = dropColumn.getName();
|
217
|
+
// skip NON json path notation to build output schema
|
218
|
+
if (! PathCompiler.isProbablyJsonPath(name)) {
|
219
|
+
continue;
|
176
220
|
}
|
221
|
+
jsonDropColumnsPut(name);
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
225
|
+
// build jsonColumns, jsonAddColumns, and jsonDropColumns
|
226
|
+
private void buildJsonSchema()
|
227
|
+
{
|
228
|
+
if (task.getDropColumns().size() > 0) {
|
229
|
+
buildJsonDropColumns();
|
230
|
+
}
|
231
|
+
else if (task.getColumns().size() > 0) {
|
232
|
+
buildJsonColumns();
|
233
|
+
}
|
234
|
+
// Add columns to last. If you want to add to head or middle, you can use `columns` option
|
235
|
+
if (task.getAddColumns().size() > 0) {
|
236
|
+
buildJsonAddColumns();
|
177
237
|
}
|
178
238
|
}
|
179
239
|
|
@@ -189,23 +249,53 @@ public class JsonVisitor
|
|
189
249
|
if (!PathCompiler.isProbablyJsonPath(name)) {
|
190
250
|
continue;
|
191
251
|
}
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
} catch (InvalidPathException e) {
|
196
|
-
throw new ConfigException(String.format("path %s, %s", name, e.getMessage()));
|
252
|
+
JsonPathUtil.assertJsonPathFormat(name);
|
253
|
+
for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
|
254
|
+
this.shouldVisitSet.add(ancestorJsonColumn.getPath());
|
197
255
|
}
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
256
|
+
Path path = PathCompiler.compile(name);
|
257
|
+
this.shouldVisitSet.add(path.toString());
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
/*
|
262
|
+
* <pre>
|
263
|
+
* $['foo']['bar'][0]['baz']
|
264
|
+
* #=>
|
265
|
+
* name: $['foo'], type: json, default: {}
|
266
|
+
* name: $['foo']['bar'], type: json, default: []
|
267
|
+
* name: $['foo']['bar'][0], type: json, default: {}
|
268
|
+
* </pre>
|
269
|
+
*
|
270
|
+
* @return ancestors as an array
|
271
|
+
*/
|
272
|
+
public static ArrayList<JsonColumn> getAncestorJsonColumnList(String path)
|
273
|
+
{
|
274
|
+
ArrayList<JsonColumn> ancestorJsonColumnList = new ArrayList<>();
|
275
|
+
Path compiledPath;
|
276
|
+
try {
|
277
|
+
compiledPath = PathCompiler.compile(path);
|
278
|
+
}
|
279
|
+
catch (InvalidPathException e) {
|
280
|
+
throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
|
281
|
+
}
|
282
|
+
StringBuilder partialPath = new StringBuilder("$");
|
283
|
+
PathToken parts = compiledPath.getRoot();
|
284
|
+
parts = parts.next(); // skip "$"
|
285
|
+
while (! parts.isLeaf()) {
|
286
|
+
partialPath.append(parts.getPathFragment());
|
287
|
+
PathToken next = parts.next();
|
288
|
+
JsonColumn jsonColumn;
|
289
|
+
if (next instanceof ArrayPathToken) {
|
290
|
+
jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newArray(new Value[0], false));
|
291
|
+
}
|
292
|
+
else {
|
293
|
+
jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newMap(new Value[0]));
|
207
294
|
}
|
295
|
+
ancestorJsonColumnList.add(jsonColumn);
|
296
|
+
parts = next;
|
208
297
|
}
|
298
|
+
return ancestorJsonColumnList;
|
209
299
|
}
|
210
300
|
|
211
301
|
boolean shouldVisit(String jsonPath)
|
@@ -253,7 +343,7 @@ public class JsonVisitor
|
|
253
343
|
}
|
254
344
|
String newPath = jsonColumn.getPath();
|
255
345
|
Value visited = visit(newPath, v);
|
256
|
-
// int i = jsonColumn.
|
346
|
+
// int i = jsonColumn.getTailIndex().intValue();
|
257
347
|
// index is shifted, so j++ is used.
|
258
348
|
newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
|
259
349
|
}
|
@@ -267,6 +357,12 @@ public class JsonVisitor
|
|
267
357
|
}
|
268
358
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
269
359
|
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
360
|
+
int i = jsonColumn.getTailIndex().intValue();
|
361
|
+
if (i < size) {
|
362
|
+
// index for add_columns must be larger than size
|
363
|
+
// just skip because we can not raise ConfigException beforehand for flexible JSON
|
364
|
+
continue;
|
365
|
+
}
|
270
366
|
int src = jsonColumn.getSrcTailIndex().intValue();
|
271
367
|
Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
|
272
368
|
if (v == null) {
|
@@ -326,6 +422,12 @@ public class JsonVisitor
|
|
326
422
|
if (this.jsonAddColumns.containsKey(rootPath)) {
|
327
423
|
Map<Value, Value> map = mapValue.map();
|
328
424
|
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
425
|
+
Value k = jsonColumn.getTailNameValue();
|
426
|
+
if (map.containsKey(k)) {
|
427
|
+
// key must be different with already existing one for add_columns
|
428
|
+
// just skip because we can not raise ConfigException beforehand for flexible JSON
|
429
|
+
continue;
|
430
|
+
}
|
329
431
|
Value src = jsonColumn.getSrcTailNameValue();
|
330
432
|
Value v = map.get(src);
|
331
433
|
if (v == null) {
|
@@ -67,13 +67,13 @@ public class TestJsonColumn
|
|
67
67
|
}
|
68
68
|
|
69
69
|
@Test
|
70
|
-
public void
|
70
|
+
public void getTailIndex()
|
71
71
|
{
|
72
|
-
assertEquals(null, JsonColumn.
|
73
|
-
assertEquals(null, JsonColumn.
|
74
|
-
assertEquals(null, JsonColumn.
|
75
|
-
assertEquals(new Long(1), JsonColumn.
|
76
|
-
assertEquals(new Long(0), JsonColumn.
|
77
|
-
assertEquals(new Long(0), JsonColumn.
|
72
|
+
assertEquals(null, JsonColumn.getTailIndex("$['foo'].bar.baz"));
|
73
|
+
assertEquals(null, JsonColumn.getTailIndex("$.foo.bar"));
|
74
|
+
assertEquals(null, JsonColumn.getTailIndex("$.foo"));
|
75
|
+
assertEquals(new Long(1), JsonColumn.getTailIndex("$.foo[0][1]"));
|
76
|
+
assertEquals(new Long(0), JsonColumn.getTailIndex("$.foo[0]"));
|
77
|
+
assertEquals(new Long(0), JsonColumn.getTailIndex("$[0]"));
|
78
78
|
}
|
79
|
-
}
|
79
|
+
}
|
@@ -14,13 +14,14 @@ import org.junit.rules.ExpectedException;
|
|
14
14
|
import org.msgpack.value.MapValue;
|
15
15
|
import org.msgpack.value.Value;
|
16
16
|
import org.msgpack.value.ValueFactory;
|
17
|
-
import com.dena.analytics.jsonpathcompiler.InvalidPathException;
|
18
17
|
|
19
18
|
import static org.embulk.spi.type.Types.JSON;
|
20
19
|
import static org.junit.Assert.assertEquals;
|
21
20
|
import static org.junit.Assert.assertFalse;
|
22
21
|
import static org.junit.Assert.assertTrue;
|
23
22
|
|
23
|
+
import java.util.ArrayList;
|
24
|
+
import java.util.Arrays;
|
24
25
|
import java.util.HashMap;
|
25
26
|
import java.util.HashSet;
|
26
27
|
|
@@ -58,16 +59,32 @@ public class TestJsonVisitor
|
|
58
59
|
return new JsonVisitor(task, inputSchema, outputSchema);
|
59
60
|
}
|
60
61
|
|
61
|
-
@Test
|
62
|
-
public void
|
62
|
+
@Test
|
63
|
+
public void getAncestorJsonColumnList()
|
63
64
|
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
65
|
+
ArrayList<JsonColumn> subject;
|
66
|
+
|
67
|
+
subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default");
|
68
|
+
assertEquals("$['json1']", subject.get(0).getPath());
|
69
|
+
assertTrue(subject.get(0).getDefaultValue().isMapValue());
|
70
|
+
assertEquals("$['json1']['a']", subject.get(1).getPath());
|
71
|
+
assertTrue(subject.get(1).getDefaultValue().isMapValue());
|
72
|
+
|
73
|
+
subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a[0].default");
|
74
|
+
assertEquals("$['json1']", subject.get(0).getPath());
|
75
|
+
assertTrue(subject.get(0).getDefaultValue().isMapValue());
|
76
|
+
assertEquals("$['json1']['a']", subject.get(1).getPath());
|
77
|
+
assertTrue(subject.get(1).getDefaultValue().isArrayValue());
|
78
|
+
assertEquals("$['json1']['a'][0]", subject.get(2).getPath());
|
79
|
+
assertTrue(subject.get(2).getDefaultValue().isMapValue());
|
80
|
+
|
81
|
+
subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default[0]");
|
82
|
+
assertEquals("$['json1']", subject.get(0).getPath());
|
83
|
+
assertTrue(subject.get(0).getDefaultValue().isMapValue());
|
84
|
+
assertEquals("$['json1']['a']", subject.get(1).getPath());
|
85
|
+
assertTrue(subject.get(1).getDefaultValue().isMapValue());
|
86
|
+
assertEquals("$['json1']['a']['default']", subject.get(2).getPath());
|
87
|
+
assertTrue(subject.get(2).getDefaultValue().isArrayValue());
|
71
88
|
}
|
72
89
|
|
73
90
|
@Test
|
@@ -103,7 +120,7 @@ public class TestJsonVisitor
|
|
103
120
|
}
|
104
121
|
|
105
122
|
@Test
|
106
|
-
public void
|
123
|
+
public void buildJsonDropColumns()
|
107
124
|
{
|
108
125
|
PluginTask task = taskFromYamlString(
|
109
126
|
"type: column",
|
@@ -135,8 +152,32 @@ public class TestJsonVisitor
|
|
135
152
|
}
|
136
153
|
}
|
137
154
|
|
155
|
+
@Test(expected = ConfigException.class)
|
156
|
+
public void configException_Columns()
|
157
|
+
{
|
158
|
+
PluginTask task = taskFromYamlString(
|
159
|
+
"type: column",
|
160
|
+
"columns:",
|
161
|
+
" - {name: \"$.json1.b.b[*]\"}");
|
162
|
+
Schema inputSchema = Schema.builder().build();
|
163
|
+
// b[*] should be written as b
|
164
|
+
jsonVisitor(task, inputSchema);
|
165
|
+
}
|
166
|
+
|
167
|
+
@Test(expected = ConfigException.class)
|
168
|
+
public void buildJsonAddColumns_ConfigException()
|
169
|
+
{
|
170
|
+
PluginTask task = taskFromYamlString(
|
171
|
+
"type: column",
|
172
|
+
"add_columns:",
|
173
|
+
" - {name: \"$.json1.b.b[*]\", type: json, default: []}");
|
174
|
+
Schema inputSchema = Schema.builder().build();
|
175
|
+
// b[*] should be written as b
|
176
|
+
jsonVisitor(task, inputSchema);
|
177
|
+
}
|
178
|
+
|
138
179
|
@Test
|
139
|
-
public void
|
180
|
+
public void buildJsonAddColumns()
|
140
181
|
{
|
141
182
|
PluginTask task = taskFromYamlString(
|
142
183
|
"type: column",
|
@@ -150,19 +191,22 @@ public class TestJsonVisitor
|
|
150
191
|
.build();
|
151
192
|
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
152
193
|
|
153
|
-
|
194
|
+
assertTrue(subject.jsonAddColumns.containsKey("$"));
|
195
|
+
assertTrue(subject.jsonAddColumns.containsKey("$['json1']"));
|
154
196
|
assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']"));
|
155
197
|
assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']['copy_array']"));
|
156
198
|
|
157
199
|
{
|
158
200
|
HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$['json1']['a']");
|
159
|
-
assertEquals(
|
201
|
+
assertEquals(3, jsonColumns.size());
|
160
202
|
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
161
203
|
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
162
204
|
assertEquals("$['json1']['a']['default']", keys[0]);
|
163
205
|
assertEquals("$['json1']['a']['default']", values[0].getPath());
|
164
206
|
assertEquals("$['json1']['a']['copy']", keys[1]);
|
165
207
|
assertEquals("$['json1']['a']['copy']", values[1].getPath());
|
208
|
+
assertEquals("$['json1']['a']['copy_array']", keys[2]);
|
209
|
+
assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
|
166
210
|
}
|
167
211
|
|
168
212
|
{
|
@@ -175,8 +219,20 @@ public class TestJsonVisitor
|
|
175
219
|
}
|
176
220
|
}
|
177
221
|
|
222
|
+
@Test(expected = ConfigException.class)
|
223
|
+
public void buildJsonColumns_ConfigException()
|
224
|
+
{
|
225
|
+
PluginTask task = taskFromYamlString(
|
226
|
+
"type: column",
|
227
|
+
"columns:",
|
228
|
+
" - {name: \"$.json1.b.b[*]\"}");
|
229
|
+
Schema inputSchema = Schema.builder().build();
|
230
|
+
// b[*] should be written as b
|
231
|
+
jsonVisitor(task, inputSchema);
|
232
|
+
}
|
233
|
+
|
178
234
|
@Test
|
179
|
-
public void
|
235
|
+
public void buildJsonColumns()
|
180
236
|
{
|
181
237
|
PluginTask task = taskFromYamlString(
|
182
238
|
"type: column",
|
@@ -190,19 +246,23 @@ public class TestJsonVisitor
|
|
190
246
|
.build();
|
191
247
|
JsonVisitor subject = jsonVisitor(task, inputSchema);
|
192
248
|
|
193
|
-
|
249
|
+
// 1st level keys are parents of jsonpath
|
250
|
+
assertTrue(subject.jsonColumns.containsKey("$"));
|
251
|
+
assertTrue(subject.jsonColumns.containsKey("$['json1']"));
|
194
252
|
assertTrue(subject.jsonColumns.containsKey("$['json1']['a']"));
|
195
253
|
assertTrue(subject.jsonColumns.containsKey("$['json1']['a']['copy_array']"));
|
196
254
|
|
197
255
|
{
|
198
256
|
HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$['json1']['a']");
|
199
|
-
assertEquals(
|
257
|
+
assertEquals(3, jsonColumns.size());
|
200
258
|
String[] keys = jsonColumns.keySet().toArray(new String[0]);
|
201
259
|
JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
|
202
260
|
assertEquals("$['json1']['a']['default']", keys[0]);
|
203
261
|
assertEquals("$['json1']['a']['default']", values[0].getPath());
|
204
262
|
assertEquals("$['json1']['a']['copy']", keys[1]);
|
205
263
|
assertEquals("$['json1']['a']['copy']", values[1].getPath());
|
264
|
+
assertEquals("$['json1']['a']['copy_array']", keys[2]);
|
265
|
+
assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
|
206
266
|
}
|
207
267
|
|
208
268
|
{
|
@@ -216,7 +276,7 @@ public class TestJsonVisitor
|
|
216
276
|
}
|
217
277
|
|
218
278
|
@Test
|
219
|
-
public void
|
279
|
+
public void buildJsonSchema()
|
220
280
|
{
|
221
281
|
PluginTask task = taskFromYamlString(
|
222
282
|
"type: column",
|
@@ -269,7 +329,6 @@ public class TestJsonVisitor
|
|
269
329
|
PluginTask task = taskFromYamlString(
|
270
330
|
"type: column",
|
271
331
|
"add_columns:",
|
272
|
-
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
273
332
|
" - {name: $.json1.k3.k3, type: string, default: v}",
|
274
333
|
" - {name: $.json1.k4, src: $.json1.k2}");
|
275
334
|
Schema inputSchema = Schema.builder()
|
@@ -297,8 +356,7 @@ public class TestJsonVisitor
|
|
297
356
|
"type: column",
|
298
357
|
"columns:",
|
299
358
|
" - {name: $.json1.k1}",
|
300
|
-
" - {name: $.json1.k2.k2}",
|
301
|
-
" - {name: $.json1.k3, type: json, default: \"{}\"}",
|
359
|
+
" - {name: $.json1.k2.k2}",
|
302
360
|
" - {name: $.json1.k3.k3, type: string, default: v}",
|
303
361
|
" - {name: $.json1.k4, src: $.json1.k2}");
|
304
362
|
Schema inputSchema = Schema.builder()
|
@@ -316,7 +374,7 @@ public class TestJsonVisitor
|
|
316
374
|
k2, ValueFactory.newMap(k2, v));
|
317
375
|
|
318
376
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
319
|
-
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
377
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
320
378
|
}
|
321
379
|
|
322
380
|
@Test
|
@@ -352,8 +410,6 @@ public class TestJsonVisitor
|
|
352
410
|
"type: column",
|
353
411
|
"add_columns:",
|
354
412
|
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
355
|
-
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
356
|
-
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
357
413
|
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
358
414
|
Schema inputSchema = Schema.builder()
|
359
415
|
.add("json1", JSON)
|
@@ -379,11 +435,8 @@ public class TestJsonVisitor
|
|
379
435
|
PluginTask task = taskFromYamlString(
|
380
436
|
"type: column",
|
381
437
|
"columns:",
|
382
|
-
" - {name: \"$.json1.k1\"}",
|
383
438
|
" - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
|
384
|
-
" - {name: \"$.json1.k2[0]\"}",
|
385
|
-
" - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
|
386
|
-
" - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
|
439
|
+
" - {name: \"$.json1.k2[0]\"}",
|
387
440
|
" - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
|
388
441
|
Schema inputSchema = Schema.builder()
|
389
442
|
.add("json1", JSON)
|
@@ -400,7 +453,7 @@ public class TestJsonVisitor
|
|
400
453
|
k2, ValueFactory.newArray(v, v));
|
401
454
|
|
402
455
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
403
|
-
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
456
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
404
457
|
}
|
405
458
|
|
406
459
|
@Test
|
@@ -435,7 +488,6 @@ public class TestJsonVisitor
|
|
435
488
|
PluginTask task = taskFromYamlString(
|
436
489
|
"type: column",
|
437
490
|
"add_columns:",
|
438
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
|
439
491
|
" - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
|
440
492
|
" - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
|
441
493
|
Schema inputSchema = Schema.builder()
|
@@ -464,7 +516,6 @@ public class TestJsonVisitor
|
|
464
516
|
"columns:",
|
465
517
|
" - {name: \"$['json1']['k1']\"}",
|
466
518
|
" - {name: \"$['json1']['k2']['k2']\"}",
|
467
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
|
468
519
|
" - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
|
469
520
|
" - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
|
470
521
|
Schema inputSchema = Schema.builder()
|
@@ -481,7 +532,7 @@ public class TestJsonVisitor
|
|
481
532
|
k2, ValueFactory.newMap(k2, v));
|
482
533
|
|
483
534
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
484
|
-
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
535
|
+
assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
|
485
536
|
}
|
486
537
|
|
487
538
|
@Test
|
@@ -517,8 +568,6 @@ public class TestJsonVisitor
|
|
517
568
|
"type: column",
|
518
569
|
"add_columns:",
|
519
570
|
" - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
|
520
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
|
521
|
-
" - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
|
522
571
|
" - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
|
523
572
|
Schema inputSchema = Schema.builder()
|
524
573
|
.add("json1", JSON)
|
@@ -544,11 +593,8 @@ public class TestJsonVisitor
|
|
544
593
|
PluginTask task = taskFromYamlString(
|
545
594
|
"type: column",
|
546
595
|
"columns:",
|
547
|
-
" - {name: \"$['json1']['k1']\"}",
|
548
596
|
" - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
|
549
597
|
" - {name: \"$['json1']['k2'][0]\"}",
|
550
|
-
" - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
|
551
|
-
" - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
|
552
598
|
" - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
|
553
599
|
Schema inputSchema = Schema.builder()
|
554
600
|
.add("json1", JSON)
|
@@ -564,7 +610,7 @@ public class TestJsonVisitor
|
|
564
610
|
k2, ValueFactory.newArray(v, v));
|
565
611
|
|
566
612
|
MapValue visited = subject.visit("$['json1']", map).asMapValue();
|
567
|
-
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
613
|
+
assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
|
568
614
|
}
|
569
615
|
|
570
616
|
// Because the dot notation is converted to single quotes by default,
|
@@ -652,13 +698,14 @@ public class TestJsonVisitor
|
|
652
698
|
assertEquals("{\"k____1\":[{\"k____1\":\"v\"}],\"k_2\":{\"k_2\":\"v\"}}", visited.toString());
|
653
699
|
}
|
654
700
|
|
701
|
+
/*
|
655
702
|
@Test
|
656
703
|
public void visit_withColumnNameIncludingSingleQuotes()
|
657
704
|
{
|
658
705
|
PluginTask task = taskFromYamlString(
|
659
706
|
"type: column",
|
660
707
|
"columns:",
|
661
|
-
" - {name: \"$[
|
708
|
+
" - {name: \"$['\\\\'json1']['k1']\"}");
|
662
709
|
Schema inputSchema = Schema.builder()
|
663
710
|
.add("'json1", JSON)
|
664
711
|
.build();
|
@@ -672,9 +719,10 @@ public class TestJsonVisitor
|
|
672
719
|
MapValue visited = subject.visit("$['\\'json1']", map).asMapValue();
|
673
720
|
assertEquals("{\"k1\":\"v\"}", visited.toString());
|
674
721
|
}
|
722
|
+
*/
|
675
723
|
|
676
724
|
@Test(expected = ConfigException.class)
|
677
|
-
public void
|
725
|
+
public void configException_MultiProperties() {
|
678
726
|
PluginTask task = taskFromYamlString(
|
679
727
|
"type: column",
|
680
728
|
"columns:",
|
@@ -687,7 +735,7 @@ public class TestJsonVisitor
|
|
687
735
|
|
688
736
|
// It is recognized multi properties if the square brackets does not close properly
|
689
737
|
@Test(expected = ConfigException.class)
|
690
|
-
public void
|
738
|
+
public void configException_PropertyIsNotSeparatedByCommas()
|
691
739
|
{
|
692
740
|
PluginTask task = taskFromYamlString(
|
693
741
|
"type: column",
|
@@ -700,7 +748,7 @@ public class TestJsonVisitor
|
|
700
748
|
}
|
701
749
|
|
702
750
|
@Test(expected = ConfigException.class)
|
703
|
-
public void
|
751
|
+
public void configException_FunctionPathToken()
|
704
752
|
{
|
705
753
|
PluginTask task = taskFromYamlString(
|
706
754
|
"type: column",
|
@@ -713,7 +761,7 @@ public class TestJsonVisitor
|
|
713
761
|
}
|
714
762
|
|
715
763
|
@Test(expected = ConfigException.class)
|
716
|
-
public void
|
764
|
+
public void configException_PredicatePathToken()
|
717
765
|
{
|
718
766
|
PluginTask task = taskFromYamlString(
|
719
767
|
"type: column",
|
@@ -726,7 +774,7 @@ public class TestJsonVisitor
|
|
726
774
|
}
|
727
775
|
|
728
776
|
@Test(expected = ConfigException.class)
|
729
|
-
public void
|
777
|
+
public void configException_ScanPathToken()
|
730
778
|
{
|
731
779
|
PluginTask task = taskFromYamlString(
|
732
780
|
"type: column",
|
@@ -739,7 +787,7 @@ public class TestJsonVisitor
|
|
739
787
|
}
|
740
788
|
|
741
789
|
@Test(expected = ConfigException.class)
|
742
|
-
public void
|
790
|
+
public void configException_MultiIndexOperation()
|
743
791
|
{
|
744
792
|
PluginTask task = taskFromYamlString(
|
745
793
|
"type: column",
|
@@ -752,7 +800,7 @@ public class TestJsonVisitor
|
|
752
800
|
}
|
753
801
|
|
754
802
|
@Test(expected = ConfigException.class)
|
755
|
-
public void
|
803
|
+
public void configException_IndexOperationAtMiddlePosition()
|
756
804
|
{
|
757
805
|
PluginTask task = taskFromYamlString(
|
758
806
|
"type: column",
|
@@ -765,7 +813,7 @@ public class TestJsonVisitor
|
|
765
813
|
}
|
766
814
|
|
767
815
|
@Test(expected = ConfigException.class)
|
768
|
-
public void
|
816
|
+
public void configException_ArraySliceOperation()
|
769
817
|
{
|
770
818
|
PluginTask task = taskFromYamlString(
|
771
819
|
"type: column",
|
@@ -778,7 +826,7 @@ public class TestJsonVisitor
|
|
778
826
|
}
|
779
827
|
|
780
828
|
@Test(expected = ConfigException.class)
|
781
|
-
public void
|
829
|
+
public void configException_MArraySliceOperationAtMiddlePosition()
|
782
830
|
{
|
783
831
|
PluginTask task = taskFromYamlString(
|
784
832
|
"type: column",
|
@@ -794,7 +842,7 @@ public class TestJsonVisitor
|
|
794
842
|
public ExpectedException thrown = ExpectedException.none();
|
795
843
|
|
796
844
|
@Test
|
797
|
-
public void
|
845
|
+
public void configException_PathCompileError()
|
798
846
|
{
|
799
847
|
PluginTask task = taskFromYamlString(
|
800
848
|
"type: column",
|
@@ -808,4 +856,4 @@ public class TestJsonVisitor
|
|
808
856
|
|
809
857
|
jsonVisitor(task, inputSchema);
|
810
858
|
}
|
811
|
-
}
|
859
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-column
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.0.
|
4
|
+
version: 0.6.0.pre3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,9 +52,13 @@ files:
|
|
52
52
|
- README.md
|
53
53
|
- build.gradle
|
54
54
|
- config/checkstyle/checkstyle.xml
|
55
|
+
- example/add_columns.txt
|
55
56
|
- example/add_columns.yml
|
57
|
+
- example/bracket_notations.txt
|
56
58
|
- example/bracket_notations.yml
|
59
|
+
- example/columns.txt
|
57
60
|
- example/columns.yml
|
61
|
+
- example/drop_columns.txt
|
58
62
|
- example/drop_columns.yml
|
59
63
|
- example/edgecase.tsv
|
60
64
|
- example/edgecase.yml
|
@@ -69,8 +73,8 @@ files:
|
|
69
73
|
- src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java
|
70
74
|
- src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
|
71
75
|
- src/main/java/org/embulk/filter/column/JsonColumn.java
|
76
|
+
- src/main/java/org/embulk/filter/column/JsonPathUtil.java
|
72
77
|
- src/main/java/org/embulk/filter/column/JsonVisitor.java
|
73
|
-
- src/main/java/org/embulk/filter/column/PathTokenUtil.java
|
74
78
|
- src/test/java/org/embulk/filter/column/TestColumnFilterPlugin.java
|
75
79
|
- src/test/java/org/embulk/filter/column/TestColumnVisitorImpl.java
|
76
80
|
- src/test/java/org/embulk/filter/column/TestJsonColumn.java
|
@@ -78,9 +82,9 @@ files:
|
|
78
82
|
- classpath/accessors-smart-1.1.jar
|
79
83
|
- classpath/asm-5.0.3.jar
|
80
84
|
- classpath/commons-lang3-3.4.jar
|
81
|
-
- classpath/embulk-filter-column-0.6.0.
|
85
|
+
- classpath/embulk-filter-column-0.6.0.pre3.jar
|
82
86
|
- classpath/json-smart-2.2.1.jar
|
83
|
-
- classpath/JsonPathCompiler-0.0.
|
87
|
+
- classpath/JsonPathCompiler-0.0.11.jar
|
84
88
|
- classpath/slf4j-api-1.7.21.jar
|
85
89
|
homepage: https://github.com/sonots/embulk-filter-column
|
86
90
|
licenses:
|
@@ -1,39 +0,0 @@
|
|
1
|
-
package org.embulk.filter.column;
|
2
|
-
|
3
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayIndexOperation;
|
4
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayPathToken;
|
5
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.FunctionPathToken;
|
6
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
|
7
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.PredicatePathToken;
|
8
|
-
import com.dena.analytics.jsonpathcompiler.expressions.path.ScanPathToken;
|
9
|
-
import org.embulk.config.ConfigException;
|
10
|
-
|
11
|
-
public class PathTokenUtil
|
12
|
-
{
|
13
|
-
public static void assertSupportedPathToken(PathToken pathToken, String path)
|
14
|
-
{
|
15
|
-
if (pathToken instanceof ArrayPathToken) {
|
16
|
-
ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
|
17
|
-
assertSupportedArrayPathToken(arrayIndexOperation, path);
|
18
|
-
}
|
19
|
-
else if (pathToken instanceof ScanPathToken) {
|
20
|
-
throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
|
21
|
-
}
|
22
|
-
else if (pathToken instanceof FunctionPathToken) {
|
23
|
-
throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
|
24
|
-
}
|
25
|
-
else if (pathToken instanceof PredicatePathToken) {
|
26
|
-
throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
|
31
|
-
{
|
32
|
-
if (arrayIndexOperation == null) {
|
33
|
-
throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
|
34
|
-
}
|
35
|
-
else if (!arrayIndexOperation.isSingleIndexOperation()) {
|
36
|
-
throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
|
37
|
-
}
|
38
|
-
}
|
39
|
-
}
|