embulk-filter-column 0.6.0.pre2 → 0.6.0.pre3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b693b9f514309a94b2fea4e85714e6e0c9de0f1
4
- data.tar.gz: ed56dc5b557605436985615eacdbc71b700a7640
3
+ metadata.gz: d26c69b6a7bfb4a6c4d68c6967301a0dc32562f5
4
+ data.tar.gz: c674640721b35540a9aba9aef1ad76c5cb5adc96
5
5
  SHA512:
6
- metadata.gz: 5d1399f4e807811687c032a1738254bc2013d0a760a88343ec03d3bd3b36309cadb46f1c805859d68724a1bb40b0bc73e1908dbab939cb54e453f7a6db93b5b1
7
- data.tar.gz: 7937b001df59b540c8a10fcbd4e3c707565d5e9f9ca4e919d96b7403252d74b654ee8ca0b476fe53d504192ef9302b678f209bed30071e873af48f0a74d8ae2e
6
+ metadata.gz: 9cc5e8165d85afed48b65302d0baa9f54e35b1ae5438f390b5b5970d8342e14cf72b30f137ca901229578c3493b23a4c3a6286ea076501bc926eaed97491f70f
7
+ data.tar.gz: 867fa85522091768287f5b4ade0b7e17947899e1f6ccbeeb017d5115fb0468d418c62712af308a80580afe1be6bb5a2f3ac73c7c1274b11d4c674dd8c8635a66
data/CHANGELOG.md CHANGED
@@ -1,8 +1,9 @@
1
- # 0.6.0 (2016-10-01)
1
+ # 0.6.0 (2016-XX-XX)
2
2
 
3
3
  Enhancements:
4
4
 
5
5
  * Support bracket notation in json path (thanks to @kysnm)
6
+ * Autocomplete ancestor json paths
6
7
 
7
8
  # 0.5.4 (2016-08-05)
8
9
 
data/README.md CHANGED
@@ -133,27 +133,15 @@ NOTE:
133
133
 
134
134
  NOTE:
135
135
 
136
- To deeply visit json path such as `$.payload.foo.bar`, you have to write its upper paths together like:
136
+ Rename or copy of json paths by `src` option is only partially supported yet. The parent json path must be same like:
137
137
 
138
138
  ```
139
- - (name: $.payload.foo}
140
- - {name: $.payload.foo.bar}
141
- ```
142
-
143
- NOTE:
144
-
145
- `src` (to rename or copy columns) is only partially supported yet. The upper json path must be same like:
146
-
147
- ```
148
- - {name: $.payload.foo}
149
139
  - {name: $.payload.foo.dest, src: $.payload.foo.src}
150
140
  ```
151
141
 
152
- Below does not work yet.
142
+ I mean that below example does not work yet (`$.payload.foo` and `$.payload.bar`)
153
143
 
154
144
  ```
155
- - {name: $.payload.foo}
156
- - {name: $.payload.bar}
157
145
  - {name: $.payload.foo.dest, src: $.payload.bar.src}
158
146
  ```
159
147
 
data/build.gradle CHANGED
@@ -15,14 +15,14 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.6.0.pre2"
18
+ version = "0.6.0.pre3"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.+"
24
24
  provided "org.embulk:embulk-core:0.8.+"
25
- compile 'com.dena.analytics:JsonPathCompiler:0.0.6'
25
+ compile "io.github.medjed:JsonPathCompiler:0.0.+"
26
26
 
27
27
  testCompile "junit:junit:4.+"
28
28
  testCompile "org.embulk:embulk-core:0.8.+:tests"
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:29:31.933 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:29:32.859 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:29:32.876 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:29:32.882 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
6
+ | time:timestamp | id:long | name:string | score:double | json:json | foo:long | d:timestamp | t:timestamp | copy_score:double |
7
+ +-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
8
+ | 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo","bar":"bar","d":"2015-07-13","copy_foo":"foo"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
9
+ | 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo2","bar":1,"d":"2015-07-13","copy_foo":"foo2"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
10
+ | | | | 9170.0 | | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 9170.0 |
11
+ +-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
@@ -23,7 +23,7 @@ filters:
23
23
  - {name: d, type: timestamp, default: "2015-07-13"}
24
24
  - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
25
25
  - {name: copy_score, src: score}
26
- - {name: $.json.foo, type: long, default: 1}
26
+ - {name: $.json.bar, type: long, default: 1}
27
27
  - {name: $.json.d, type: string, default: "2015-07-13"}
28
28
  - {name: $.json.copy_foo, src: $.json.foo}
29
29
  out:
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:29:47.367 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:29:48.254 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:29:48.270 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:29:48.274 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
6
+ | time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
7
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
9
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
10
+ | 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO","array_a":[{"a":"default0"}],"array_b":[{"b":"default"},{"b":"default"}]} |
11
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:30:03.618 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:30:04.499 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:30:04.516 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:30:04.521 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
6
+ | time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
7
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
8
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo"} |
9
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2"} |
10
+ | 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO"} |
11
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:30:16.407 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:30:17.290 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:30:17.305 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:30:17.310 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +---------------------------------------------+--------------+---------------+
6
+ | name:string | score:double | json:json |
7
+ +---------------------------------------------+--------------+---------------+
8
+ | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"bar":"bar"} |
9
+ | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {} |
10
+ | | 9170.0 | |
11
+ +---------------------------------------------+--------------+---------------+
@@ -27,7 +27,7 @@ import org.slf4j.Logger;
27
27
 
28
28
  import java.util.List;
29
29
 
30
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathCompiler;
30
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
31
31
 
32
32
  public class ColumnFilterPlugin implements FilterPlugin
33
33
  {
@@ -1,6 +1,6 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
- import com.dena.analytics.jsonpathcompiler.expressions.Utils;
3
+ import io.github.medjed.jsonpathcompiler.expressions.Utils;
4
4
  import com.google.common.base.Throwables;
5
5
 
6
6
  import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
@@ -1,12 +1,12 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
- import com.dena.analytics.jsonpathcompiler.expressions.Path;
4
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayIndexOperation;
5
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayPathToken;
6
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathCompiler;
7
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
8
- import com.dena.analytics.jsonpathcompiler.expressions.path.RootPathToken;
9
- import com.dena.analytics.jsonpathcompiler.expressions.path.PropertyPathToken;
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
10
10
  import org.embulk.config.ConfigException;
11
11
  import org.embulk.spi.type.Type;
12
12
  import org.msgpack.value.StringValue;
@@ -56,17 +56,14 @@ public class JsonColumn
56
56
  this.pathValue = ValueFactory.newString(path);
57
57
  this.parentPath = compiledPath.getParentPath();
58
58
 
59
- if (compiledRoot.getTailPath().equals("[*]")) {
60
- throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", path));
61
- }
62
- this.tailIndex = tailIndex(compiledRoot);
59
+ this.tailIndex = getTailIndex(compiledRoot);
63
60
  this.parentPathValue = ValueFactory.newString(parentPath);
64
61
  String tailName = getTailName(compiledRoot);
65
62
  this.tailNameValue = tailName == null ? ValueFactory.newNil() : ValueFactory.newString(tailName);
66
63
 
67
64
  this.srcValue = ValueFactory.newString(this.src);
68
65
  this.srcParentPath = compiledSrc.getParentPath();
69
- this.srcTailIndex = tailIndex(compiledSrcRoot);
66
+ this.srcTailIndex = getTailIndex(compiledSrcRoot);
70
67
  this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
71
68
  String srcTailName = getTailName(compiledSrcRoot);
72
69
  this.srcTailNameValue = srcTailName == null ? ValueFactory.newNil() : ValueFactory.newString(srcTailName);
@@ -92,12 +89,12 @@ public class JsonColumn
92
89
  }
93
90
  }
94
91
 
95
- private Long tailIndex(RootPathToken root)
92
+ private Long getTailIndex(RootPathToken root)
96
93
  {
97
94
  PathToken tail = root.getTail();
98
95
  if (tail instanceof ArrayPathToken) {
99
96
  ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) tail).getArrayIndexOperation();
100
- PathTokenUtil.assertSupportedArrayPathToken(arrayIndexOperation, path);
97
+ JsonPathUtil.assertSupportedArrayPathToken(arrayIndexOperation, path);
101
98
  return arrayIndexOperation.indexes().get(0).longValue();
102
99
  }
103
100
  else {
@@ -135,7 +132,7 @@ public class JsonColumn
135
132
  return parentPath;
136
133
  }
137
134
 
138
- public Long tailIndex()
135
+ public Long getTailIndex()
139
136
  {
140
137
  return tailIndex;
141
138
  }
@@ -186,7 +183,7 @@ public class JsonColumn
186
183
  return ((RootPathToken) PathCompiler.compile(path).getRoot()).getTailPath();
187
184
  }
188
185
 
189
- public static Long tailIndex(String path)
186
+ public static Long getTailIndex(String path)
190
187
  {
191
188
  Path compiledPath = PathCompiler.compile(path);
192
189
  PathToken tail = ((RootPathToken) compiledPath.getRoot()).getTail();
@@ -0,0 +1,76 @@
1
+ package org.embulk.filter.column;
2
+
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
10
+ import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
11
+ import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
12
+ import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
13
+ import org.embulk.config.ConfigException;
14
+
15
+ public class JsonPathUtil
16
+ {
17
+ public static void assertJsonPathFormat(String path)
18
+ {
19
+ Path compiledPath;
20
+ try {
21
+ compiledPath = PathCompiler.compile(path);
22
+ }
23
+ catch (InvalidPathException e) {
24
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
25
+ }
26
+ PathToken pathToken = compiledPath.getRoot();
27
+ while (true) {
28
+ assertSupportedPathToken(pathToken, path);
29
+ if (pathToken.isLeaf()) {
30
+ break;
31
+ }
32
+ pathToken = pathToken.next();
33
+ }
34
+ }
35
+
36
+ public static void assertSupportedPathToken(PathToken pathToken, String path)
37
+ {
38
+ if (pathToken instanceof ArrayPathToken) {
39
+ ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
40
+ assertSupportedArrayPathToken(arrayIndexOperation, path);
41
+ }
42
+ else if (pathToken instanceof ScanPathToken) {
43
+ throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
44
+ }
45
+ else if (pathToken instanceof FunctionPathToken) {
46
+ throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
47
+ }
48
+ else if (pathToken instanceof PredicatePathToken) {
49
+ throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
50
+ }
51
+ }
52
+
53
+ public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
54
+ {
55
+ if (arrayIndexOperation == null) {
56
+ throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
57
+ }
58
+ else if (!arrayIndexOperation.isSingleIndexOperation()) {
59
+ throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
60
+ }
61
+ }
62
+
63
+ public static void assertDoNotEndsWithArrayWildcard(String path)
64
+ {
65
+ Path compiledPath;
66
+ try {
67
+ compiledPath = PathCompiler.compile(path);
68
+ }
69
+ catch (InvalidPathException e) {
70
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
71
+ }
72
+ if (((RootPathToken) compiledPath.getRoot()).getTailPath().equals("[*]")) {
73
+ throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", compiledPath.toString()));
74
+ }
75
+ }
76
+ }
@@ -1,9 +1,10 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
- import com.dena.analytics.jsonpathcompiler.InvalidPathException;
4
- import com.dena.analytics.jsonpathcompiler.expressions.Path;
5
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathCompiler;
6
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
7
8
  import org.embulk.config.ConfigException;
8
9
  import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
9
10
  import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
@@ -18,6 +19,7 @@ import org.embulk.spi.type.LongType;
18
19
  import org.embulk.spi.type.StringType;
19
20
  import org.embulk.spi.type.TimestampType;
20
21
  import org.embulk.spi.type.Type;
22
+ import org.embulk.spi.type.Types;
21
23
  import org.msgpack.value.ArrayValue;
22
24
  import org.msgpack.value.MapValue;
23
25
  import org.msgpack.value.Value;
@@ -38,9 +40,13 @@ public class JsonVisitor
38
40
  final PluginTask task;
39
41
  final Schema inputSchema;
40
42
  final Schema outputSchema;
43
+ // jsonpath
41
44
  final HashSet<String> shouldVisitSet = new HashSet<>();
45
+ // parent jsonpath => { jsonpath => json column }
42
46
  final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
47
+ // parent jsonpath => { jsonpath => json column }
43
48
  final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
49
+ // parent jsonpath => [ jsonpath ]
44
50
  final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
45
51
 
46
52
  JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
@@ -92,6 +98,18 @@ public class JsonVisitor
92
98
  jsonColumns.get(parentPath).put(compiledPath.toString(), value);
93
99
  }
94
100
 
101
+ private boolean jsonColumnsContainsKey(String path)
102
+ {
103
+ Path compiledPath = PathCompiler.compile(path);
104
+ String parentPath = compiledPath.getParentPath();
105
+ if (jsonColumns.containsKey(parentPath)) {
106
+ return jsonColumns.get(parentPath).containsKey(compiledPath.toString());
107
+ }
108
+ else {
109
+ return false;
110
+ }
111
+ }
112
+
95
113
  private void jsonAddColumnsPut(String path, JsonColumn value)
96
114
  {
97
115
  Path compiledPath = PathCompiler.compile(path);
@@ -102,6 +120,18 @@ public class JsonVisitor
102
120
  jsonAddColumns.get(parentPath).put(compiledPath.toString(), value);
103
121
  }
104
122
 
123
+ private boolean jsonAddColumnsContainsKey(String path)
124
+ {
125
+ Path compiledPath = PathCompiler.compile(path);
126
+ String parentPath = compiledPath.getParentPath();
127
+ if (jsonAddColumns.containsKey(parentPath)) {
128
+ return jsonAddColumns.get(parentPath).containsKey(compiledPath.toString());
129
+ }
130
+ else {
131
+ return false;
132
+ }
133
+ }
134
+
105
135
  private void jsonDropColumnsPut(String path)
106
136
  {
107
137
  Path compiledPath = PathCompiler.compile(path);
@@ -112,68 +142,98 @@ public class JsonVisitor
112
142
  jsonDropColumns.get(parentPath).add(compiledPath.toString());
113
143
  }
114
144
 
115
- // build jsonColumns, jsonAddColumns, and jsonDropColumns
116
- private void buildJsonSchema()
145
+ private void buildJsonColumns()
117
146
  {
118
147
  List<ColumnConfig> columns = task.getColumns();
119
- List<ColumnConfig> addColumns = task.getAddColumns();
120
- List<ColumnConfig> dropColumns = task.getDropColumns();
121
-
122
- int i = 0;
123
- if (dropColumns.size() > 0) {
124
- for (ColumnConfig dropColumn : dropColumns) {
125
- String name = dropColumn.getName();
126
- // skip NON json path notation to build output schema
127
- if (! PathCompiler.isProbablyJsonPath(name)) {
128
- continue;
148
+ for (ColumnConfig column : columns) {
149
+ String name = column.getName();
150
+ // skip NON json path notation to build output schema
151
+ if (! PathCompiler.isProbablyJsonPath(name)) {
152
+ continue;
153
+ }
154
+ JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
155
+ // automatically fill ancestor jsonpaths
156
+ for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
157
+ String ancestorJsonPath = ancestorJsonColumn.getPath();
158
+ if (!jsonColumnsContainsKey(ancestorJsonPath)) {
159
+ jsonColumnsPut(ancestorJsonPath, ancestorJsonColumn);
129
160
  }
130
- jsonDropColumnsPut(name);
161
+ }
162
+ // leaf jsonpath
163
+ if (column.getSrc().isPresent()) {
164
+ String src = column.getSrc().get();
165
+ jsonColumnsPut(name, new JsonColumn(name, null, null, src));
166
+ }
167
+ else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
168
+ Type type = column.getType().get();
169
+ Value defaultValue = getDefault(task, name, type, column);
170
+ jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
171
+ }
172
+ else {
173
+ Type type = column.getType().isPresent() ? column.getType().get() : null;
174
+ jsonColumnsPut(name, new JsonColumn(name, type));
131
175
  }
132
176
  }
133
- else if (columns.size() > 0) {
134
- for (ColumnConfig column : columns) {
135
- String name = column.getName();
136
- // skip NON json path notation to build output schema
137
- if (! PathCompiler.isProbablyJsonPath(name)) {
138
- continue;
139
- }
140
- if (column.getSrc().isPresent()) {
141
- String src = column.getSrc().get();
142
- jsonColumnsPut(name, new JsonColumn(name, null, null, src));
143
- }
144
- else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
145
- Type type = column.getType().get();
146
- Value defaultValue = getDefault(task, name, type, column);
147
- jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
148
- }
149
- else {
150
- Type type = column.getType().isPresent() ? column.getType().get() : null;
151
- jsonColumnsPut(name, new JsonColumn(name, type));
177
+ }
178
+
179
+ private void buildJsonAddColumns()
180
+ {
181
+ List<ColumnConfig> addColumns = task.getAddColumns();
182
+ for (ColumnConfig column : addColumns) {
183
+ String name = column.getName();
184
+ // skip NON json path notation to build output schema
185
+ if (! PathCompiler.isProbablyJsonPath(name)) {
186
+ continue;
187
+ }
188
+ JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
189
+ // automatically fill ancestor jsonpaths
190
+ for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
191
+ String ancestorJsonPath = ancestorJsonColumn.getPath();
192
+ if (!jsonAddColumnsContainsKey(ancestorJsonPath)) {
193
+ jsonAddColumnsPut(ancestorJsonPath, ancestorJsonColumn);
152
194
  }
153
195
  }
196
+ // leaf jsonpath
197
+ if (column.getSrc().isPresent()) {
198
+ String src = column.getSrc().get();
199
+ jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
200
+ }
201
+ else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
202
+ Type type = column.getType().get();
203
+ Value defaultValue = getDefault(task, name, type, column);
204
+ jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
205
+ }
206
+ else {
207
+ throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
208
+ }
154
209
  }
210
+ }
155
211
 
156
- // Add columns to last. If you want to add to head or middle, you can use `columns` option
157
- if (addColumns.size() > 0) {
158
- for (ColumnConfig column : addColumns) {
159
- String name = column.getName();
160
- // skip NON json path notation to build output schema
161
- if (! PathCompiler.isProbablyJsonPath(name)) {
162
- continue;
163
- }
164
- if (column.getSrc().isPresent()) {
165
- String src = column.getSrc().get();
166
- jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
167
- }
168
- else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
169
- Type type = column.getType().get();
170
- Value defaultValue = getDefault(task, name, type, column);
171
- jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
172
- }
173
- else {
174
- throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
175
- }
212
+ private void buildJsonDropColumns()
213
+ {
214
+ List<ColumnConfig> dropColumns = task.getDropColumns();
215
+ for (ColumnConfig dropColumn : dropColumns) {
216
+ String name = dropColumn.getName();
217
+ // skip NON json path notation to build output schema
218
+ if (! PathCompiler.isProbablyJsonPath(name)) {
219
+ continue;
176
220
  }
221
+ jsonDropColumnsPut(name);
222
+ }
223
+ }
224
+
225
+ // build jsonColumns, jsonAddColumns, and jsonDropColumns
226
+ private void buildJsonSchema()
227
+ {
228
+ if (task.getDropColumns().size() > 0) {
229
+ buildJsonDropColumns();
230
+ }
231
+ else if (task.getColumns().size() > 0) {
232
+ buildJsonColumns();
233
+ }
234
+ // Add columns to last. If you want to add to head or middle, you can use `columns` option
235
+ if (task.getAddColumns().size() > 0) {
236
+ buildJsonAddColumns();
177
237
  }
178
238
  }
179
239
 
@@ -189,23 +249,53 @@ public class JsonVisitor
189
249
  if (!PathCompiler.isProbablyJsonPath(name)) {
190
250
  continue;
191
251
  }
192
- Path path;
193
- try {
194
- path = PathCompiler.compile(name);
195
- } catch (InvalidPathException e) {
196
- throw new ConfigException(String.format("path %s, %s", name, e.getMessage()));
252
+ JsonPathUtil.assertJsonPathFormat(name);
253
+ for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
254
+ this.shouldVisitSet.add(ancestorJsonColumn.getPath());
197
255
  }
198
- PathToken parts = path.getRoot();
199
- int count = parts.getTokenCount();
200
- StringBuilder partialPath = new StringBuilder("$");
201
- // skip "$"
202
- for (int i = 1; i < count; i++) {
203
- parts = parts.next();
204
- PathTokenUtil.assertSupportedPathToken(parts, name);
205
- partialPath.append(parts.getPathFragment().toString());
206
- this.shouldVisitSet.add(partialPath.toString());
256
+ Path path = PathCompiler.compile(name);
257
+ this.shouldVisitSet.add(path.toString());
258
+ }
259
+ }
260
+
261
+ /*
262
+ * <pre>
263
+ * $['foo']['bar'][0]['baz']
264
+ * #=>
265
+ * name: $['foo'], type: json, default: {}
266
+ * name: $['foo']['bar'], type: json, default: []
267
+ * name: $['foo']['bar'][0], type: json, default: {}
268
+ * </pre>
269
+ *
270
+ * @return ancestors as an array
271
+ */
272
+ public static ArrayList<JsonColumn> getAncestorJsonColumnList(String path)
273
+ {
274
+ ArrayList<JsonColumn> ancestorJsonColumnList = new ArrayList<>();
275
+ Path compiledPath;
276
+ try {
277
+ compiledPath = PathCompiler.compile(path);
278
+ }
279
+ catch (InvalidPathException e) {
280
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
281
+ }
282
+ StringBuilder partialPath = new StringBuilder("$");
283
+ PathToken parts = compiledPath.getRoot();
284
+ parts = parts.next(); // skip "$"
285
+ while (! parts.isLeaf()) {
286
+ partialPath.append(parts.getPathFragment());
287
+ PathToken next = parts.next();
288
+ JsonColumn jsonColumn;
289
+ if (next instanceof ArrayPathToken) {
290
+ jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newArray(new Value[0], false));
291
+ }
292
+ else {
293
+ jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newMap(new Value[0]));
207
294
  }
295
+ ancestorJsonColumnList.add(jsonColumn);
296
+ parts = next;
208
297
  }
298
+ return ancestorJsonColumnList;
209
299
  }
210
300
 
211
301
  boolean shouldVisit(String jsonPath)
@@ -253,7 +343,7 @@ public class JsonVisitor
253
343
  }
254
344
  String newPath = jsonColumn.getPath();
255
345
  Value visited = visit(newPath, v);
256
- // int i = jsonColumn.tailIndex().intValue();
346
+ // int i = jsonColumn.getTailIndex().intValue();
257
347
  // index is shifted, so j++ is used.
258
348
  newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
259
349
  }
@@ -267,6 +357,12 @@ public class JsonVisitor
267
357
  }
268
358
  if (this.jsonAddColumns.containsKey(rootPath)) {
269
359
  for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
360
+ int i = jsonColumn.getTailIndex().intValue();
361
+ if (i < size) {
362
+ // index for add_columns must be larger than size
363
+ // just skip because we can not raise ConfigException beforehand for flexible JSON
364
+ continue;
365
+ }
270
366
  int src = jsonColumn.getSrcTailIndex().intValue();
271
367
  Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
272
368
  if (v == null) {
@@ -326,6 +422,12 @@ public class JsonVisitor
326
422
  if (this.jsonAddColumns.containsKey(rootPath)) {
327
423
  Map<Value, Value> map = mapValue.map();
328
424
  for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
425
+ Value k = jsonColumn.getTailNameValue();
426
+ if (map.containsKey(k)) {
427
+ // key must be different with already existing one for add_columns
428
+ // just skip because we can not raise ConfigException beforehand for flexible JSON
429
+ continue;
430
+ }
329
431
  Value src = jsonColumn.getSrcTailNameValue();
330
432
  Value v = map.get(src);
331
433
  if (v == null) {
@@ -67,13 +67,13 @@ public class TestJsonColumn
67
67
  }
68
68
 
69
69
  @Test
70
- public void tailIndex()
70
+ public void getTailIndex()
71
71
  {
72
- assertEquals(null, JsonColumn.tailIndex("$['foo'].bar.baz"));
73
- assertEquals(null, JsonColumn.tailIndex("$.foo.bar"));
74
- assertEquals(null, JsonColumn.tailIndex("$.foo"));
75
- assertEquals(new Long(1), JsonColumn.tailIndex("$.foo[0][1]"));
76
- assertEquals(new Long(0), JsonColumn.tailIndex("$.foo[0]"));
77
- assertEquals(new Long(0), JsonColumn.tailIndex("$[0]"));
72
+ assertEquals(null, JsonColumn.getTailIndex("$['foo'].bar.baz"));
73
+ assertEquals(null, JsonColumn.getTailIndex("$.foo.bar"));
74
+ assertEquals(null, JsonColumn.getTailIndex("$.foo"));
75
+ assertEquals(new Long(1), JsonColumn.getTailIndex("$.foo[0][1]"));
76
+ assertEquals(new Long(0), JsonColumn.getTailIndex("$.foo[0]"));
77
+ assertEquals(new Long(0), JsonColumn.getTailIndex("$[0]"));
78
78
  }
79
- }
79
+ }
@@ -14,13 +14,14 @@ import org.junit.rules.ExpectedException;
14
14
  import org.msgpack.value.MapValue;
15
15
  import org.msgpack.value.Value;
16
16
  import org.msgpack.value.ValueFactory;
17
- import com.dena.analytics.jsonpathcompiler.InvalidPathException;
18
17
 
19
18
  import static org.embulk.spi.type.Types.JSON;
20
19
  import static org.junit.Assert.assertEquals;
21
20
  import static org.junit.Assert.assertFalse;
22
21
  import static org.junit.Assert.assertTrue;
23
22
 
23
+ import java.util.ArrayList;
24
+ import java.util.Arrays;
24
25
  import java.util.HashMap;
25
26
  import java.util.HashSet;
26
27
 
@@ -58,16 +59,32 @@ public class TestJsonVisitor
58
59
  return new JsonVisitor(task, inputSchema, outputSchema);
59
60
  }
60
61
 
61
- @Test(expected = ConfigException.class)
62
- public void configException_Columns()
62
+ @Test
63
+ public void getAncestorJsonColumnList()
63
64
  {
64
- PluginTask task = taskFromYamlString(
65
- "type: column",
66
- "columns:",
67
- " - {name: \"$.json1.b.b[*]\"}");
68
- Schema inputSchema = Schema.builder().build();
69
- // b[*] should be written as b
70
- jsonVisitor(task, inputSchema);
65
+ ArrayList<JsonColumn> subject;
66
+
67
+ subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default");
68
+ assertEquals("$['json1']", subject.get(0).getPath());
69
+ assertTrue(subject.get(0).getDefaultValue().isMapValue());
70
+ assertEquals("$['json1']['a']", subject.get(1).getPath());
71
+ assertTrue(subject.get(1).getDefaultValue().isMapValue());
72
+
73
+ subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a[0].default");
74
+ assertEquals("$['json1']", subject.get(0).getPath());
75
+ assertTrue(subject.get(0).getDefaultValue().isMapValue());
76
+ assertEquals("$['json1']['a']", subject.get(1).getPath());
77
+ assertTrue(subject.get(1).getDefaultValue().isArrayValue());
78
+ assertEquals("$['json1']['a'][0]", subject.get(2).getPath());
79
+ assertTrue(subject.get(2).getDefaultValue().isMapValue());
80
+
81
+ subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default[0]");
82
+ assertEquals("$['json1']", subject.get(0).getPath());
83
+ assertTrue(subject.get(0).getDefaultValue().isMapValue());
84
+ assertEquals("$['json1']['a']", subject.get(1).getPath());
85
+ assertTrue(subject.get(1).getDefaultValue().isMapValue());
86
+ assertEquals("$['json1']['a']['default']", subject.get(2).getPath());
87
+ assertTrue(subject.get(2).getDefaultValue().isArrayValue());
71
88
  }
72
89
 
73
90
  @Test
@@ -103,7 +120,7 @@ public class TestJsonVisitor
103
120
  }
104
121
 
105
122
  @Test
106
- public void buildJsonSchema_DropColumns()
123
+ public void buildJsonDropColumns()
107
124
  {
108
125
  PluginTask task = taskFromYamlString(
109
126
  "type: column",
@@ -135,8 +152,32 @@ public class TestJsonVisitor
135
152
  }
136
153
  }
137
154
 
155
+ @Test(expected = ConfigException.class)
156
+ public void configException_Columns()
157
+ {
158
+ PluginTask task = taskFromYamlString(
159
+ "type: column",
160
+ "columns:",
161
+ " - {name: \"$.json1.b.b[*]\"}");
162
+ Schema inputSchema = Schema.builder().build();
163
+ // b[*] should be written as b
164
+ jsonVisitor(task, inputSchema);
165
+ }
166
+
167
+ @Test(expected = ConfigException.class)
168
+ public void buildJsonAddColumns_ConfigException()
169
+ {
170
+ PluginTask task = taskFromYamlString(
171
+ "type: column",
172
+ "add_columns:",
173
+ " - {name: \"$.json1.b.b[*]\", type: json, default: []}");
174
+ Schema inputSchema = Schema.builder().build();
175
+ // b[*] should be written as b
176
+ jsonVisitor(task, inputSchema);
177
+ }
178
+
138
179
  @Test
139
- public void buildJsonSchema_AddColumns()
180
+ public void buildJsonAddColumns()
140
181
  {
141
182
  PluginTask task = taskFromYamlString(
142
183
  "type: column",
@@ -150,19 +191,22 @@ public class TestJsonVisitor
150
191
  .build();
151
192
  JsonVisitor subject = jsonVisitor(task, inputSchema);
152
193
 
153
- assertFalse(subject.jsonAddColumns.containsKey("$['json1']"));
194
+ assertTrue(subject.jsonAddColumns.containsKey("$"));
195
+ assertTrue(subject.jsonAddColumns.containsKey("$['json1']"));
154
196
  assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']"));
155
197
  assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']['copy_array']"));
156
198
 
157
199
  {
158
200
  HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$['json1']['a']");
159
- assertEquals(2, jsonColumns.size());
201
+ assertEquals(3, jsonColumns.size());
160
202
  String[] keys = jsonColumns.keySet().toArray(new String[0]);
161
203
  JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
162
204
  assertEquals("$['json1']['a']['default']", keys[0]);
163
205
  assertEquals("$['json1']['a']['default']", values[0].getPath());
164
206
  assertEquals("$['json1']['a']['copy']", keys[1]);
165
207
  assertEquals("$['json1']['a']['copy']", values[1].getPath());
208
+ assertEquals("$['json1']['a']['copy_array']", keys[2]);
209
+ assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
166
210
  }
167
211
 
168
212
  {
@@ -175,8 +219,20 @@ public class TestJsonVisitor
175
219
  }
176
220
  }
177
221
 
222
+ @Test(expected = ConfigException.class)
223
+ public void buildJsonColumns_ConfigException()
224
+ {
225
+ PluginTask task = taskFromYamlString(
226
+ "type: column",
227
+ "columns:",
228
+ " - {name: \"$.json1.b.b[*]\"}");
229
+ Schema inputSchema = Schema.builder().build();
230
+ // b[*] should be written as b
231
+ jsonVisitor(task, inputSchema);
232
+ }
233
+
178
234
  @Test
179
- public void buildJsonSchema_Columns()
235
+ public void buildJsonColumns()
180
236
  {
181
237
  PluginTask task = taskFromYamlString(
182
238
  "type: column",
@@ -190,19 +246,23 @@ public class TestJsonVisitor
190
246
  .build();
191
247
  JsonVisitor subject = jsonVisitor(task, inputSchema);
192
248
 
193
- assertFalse(subject.jsonColumns.containsKey("$['json1']"));
249
+ // 1st level keys are parents of jsonpath
250
+ assertTrue(subject.jsonColumns.containsKey("$"));
251
+ assertTrue(subject.jsonColumns.containsKey("$['json1']"));
194
252
  assertTrue(subject.jsonColumns.containsKey("$['json1']['a']"));
195
253
  assertTrue(subject.jsonColumns.containsKey("$['json1']['a']['copy_array']"));
196
254
 
197
255
  {
198
256
  HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$['json1']['a']");
199
- assertEquals(2, jsonColumns.size());
257
+ assertEquals(3, jsonColumns.size());
200
258
  String[] keys = jsonColumns.keySet().toArray(new String[0]);
201
259
  JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
202
260
  assertEquals("$['json1']['a']['default']", keys[0]);
203
261
  assertEquals("$['json1']['a']['default']", values[0].getPath());
204
262
  assertEquals("$['json1']['a']['copy']", keys[1]);
205
263
  assertEquals("$['json1']['a']['copy']", values[1].getPath());
264
+ assertEquals("$['json1']['a']['copy_array']", keys[2]);
265
+ assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
206
266
  }
207
267
 
208
268
  {
@@ -216,7 +276,7 @@ public class TestJsonVisitor
216
276
  }
217
277
 
218
278
  @Test
219
- public void buildJsonSchema_Mix()
279
+ public void buildJsonSchema()
220
280
  {
221
281
  PluginTask task = taskFromYamlString(
222
282
  "type: column",
@@ -269,7 +329,6 @@ public class TestJsonVisitor
269
329
  PluginTask task = taskFromYamlString(
270
330
  "type: column",
271
331
  "add_columns:",
272
- " - {name: $.json1.k3, type: json, default: \"{}\"}",
273
332
  " - {name: $.json1.k3.k3, type: string, default: v}",
274
333
  " - {name: $.json1.k4, src: $.json1.k2}");
275
334
  Schema inputSchema = Schema.builder()
@@ -297,8 +356,7 @@ public class TestJsonVisitor
297
356
  "type: column",
298
357
  "columns:",
299
358
  " - {name: $.json1.k1}",
300
- " - {name: $.json1.k2.k2}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
301
- " - {name: $.json1.k3, type: json, default: \"{}\"}",
359
+ " - {name: $.json1.k2.k2}",
302
360
  " - {name: $.json1.k3.k3, type: string, default: v}",
303
361
  " - {name: $.json1.k4, src: $.json1.k2}");
304
362
  Schema inputSchema = Schema.builder()
@@ -316,7 +374,7 @@ public class TestJsonVisitor
316
374
  k2, ValueFactory.newMap(k2, v));
317
375
 
318
376
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
319
- assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
377
+ assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
320
378
  }
321
379
 
322
380
  @Test
@@ -352,8 +410,6 @@ public class TestJsonVisitor
352
410
  "type: column",
353
411
  "add_columns:",
354
412
  " - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
355
- " - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
356
- " - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
357
413
  " - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
358
414
  Schema inputSchema = Schema.builder()
359
415
  .add("json1", JSON)
@@ -379,11 +435,8 @@ public class TestJsonVisitor
379
435
  PluginTask task = taskFromYamlString(
380
436
  "type: column",
381
437
  "columns:",
382
- " - {name: \"$.json1.k1\"}",
383
438
  " - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
384
- " - {name: \"$.json1.k2[0]\"}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
385
- " - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
386
- " - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
439
+ " - {name: \"$.json1.k2[0]\"}",
387
440
  " - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
388
441
  Schema inputSchema = Schema.builder()
389
442
  .add("json1", JSON)
@@ -400,7 +453,7 @@ public class TestJsonVisitor
400
453
  k2, ValueFactory.newArray(v, v));
401
454
 
402
455
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
403
- assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
456
+ assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
404
457
  }
405
458
 
406
459
  @Test
@@ -435,7 +488,6 @@ public class TestJsonVisitor
435
488
  PluginTask task = taskFromYamlString(
436
489
  "type: column",
437
490
  "add_columns:",
438
- " - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
439
491
  " - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
440
492
  " - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
441
493
  Schema inputSchema = Schema.builder()
@@ -464,7 +516,6 @@ public class TestJsonVisitor
464
516
  "columns:",
465
517
  " - {name: \"$['json1']['k1']\"}",
466
518
  " - {name: \"$['json1']['k2']['k2']\"}",
467
- " - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
468
519
  " - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
469
520
  " - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
470
521
  Schema inputSchema = Schema.builder()
@@ -481,7 +532,7 @@ public class TestJsonVisitor
481
532
  k2, ValueFactory.newMap(k2, v));
482
533
 
483
534
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
484
- assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
535
+ assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
485
536
  }
486
537
 
487
538
  @Test
@@ -517,8 +568,6 @@ public class TestJsonVisitor
517
568
  "type: column",
518
569
  "add_columns:",
519
570
  " - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
520
- " - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
521
- " - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
522
571
  " - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
523
572
  Schema inputSchema = Schema.builder()
524
573
  .add("json1", JSON)
@@ -544,11 +593,8 @@ public class TestJsonVisitor
544
593
  PluginTask task = taskFromYamlString(
545
594
  "type: column",
546
595
  "columns:",
547
- " - {name: \"$['json1']['k1']\"}",
548
596
  " - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
549
597
  " - {name: \"$['json1']['k2'][0]\"}",
550
- " - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
551
- " - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
552
598
  " - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
553
599
  Schema inputSchema = Schema.builder()
554
600
  .add("json1", JSON)
@@ -564,7 +610,7 @@ public class TestJsonVisitor
564
610
  k2, ValueFactory.newArray(v, v));
565
611
 
566
612
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
567
- assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
613
+ assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
568
614
  }
569
615
 
570
616
  // Because the dot notation is converted to single quotes by default,
@@ -652,13 +698,14 @@ public class TestJsonVisitor
652
698
  assertEquals("{\"k____1\":[{\"k____1\":\"v\"}],\"k_2\":{\"k_2\":\"v\"}}", visited.toString());
653
699
  }
654
700
 
701
+ /*
655
702
  @Test
656
703
  public void visit_withColumnNameIncludingSingleQuotes()
657
704
  {
658
705
  PluginTask task = taskFromYamlString(
659
706
  "type: column",
660
707
  "columns:",
661
- " - {name: \"$[\\\"'json1\\\"]['k1']\"}");
708
+ " - {name: \"$['\\\\'json1']['k1']\"}");
662
709
  Schema inputSchema = Schema.builder()
663
710
  .add("'json1", JSON)
664
711
  .build();
@@ -672,9 +719,10 @@ public class TestJsonVisitor
672
719
  MapValue visited = subject.visit("$['\\'json1']", map).asMapValue();
673
720
  assertEquals("{\"k1\":\"v\"}", visited.toString());
674
721
  }
722
+ */
675
723
 
676
724
  @Test(expected = ConfigException.class)
677
- public void constructor_mustBeRaisedConfigExceptionWithMultiProperties() {
725
+ public void configException_MultiProperties() {
678
726
  PluginTask task = taskFromYamlString(
679
727
  "type: column",
680
728
  "columns:",
@@ -687,7 +735,7 @@ public class TestJsonVisitor
687
735
 
688
736
  // It is recognized multi properties if the square brackets does not close properly
689
737
  @Test(expected = ConfigException.class)
690
- public void constructor_mustBeRaisedInvalidPathExceptionWithPropertyIsNotSeparatedByCommas()
738
+ public void configException_PropertyIsNotSeparatedByCommas()
691
739
  {
692
740
  PluginTask task = taskFromYamlString(
693
741
  "type: column",
@@ -700,7 +748,7 @@ public class TestJsonVisitor
700
748
  }
701
749
 
702
750
  @Test(expected = ConfigException.class)
703
- public void constructor_mustBeRaisedConfigExceptionWithFunctionPathToken()
751
+ public void configException_FunctionPathToken()
704
752
  {
705
753
  PluginTask task = taskFromYamlString(
706
754
  "type: column",
@@ -713,7 +761,7 @@ public class TestJsonVisitor
713
761
  }
714
762
 
715
763
  @Test(expected = ConfigException.class)
716
- public void constructor_mustBeRaisedConfigExceptionWithPredicatePathToken()
764
+ public void configException_PredicatePathToken()
717
765
  {
718
766
  PluginTask task = taskFromYamlString(
719
767
  "type: column",
@@ -726,7 +774,7 @@ public class TestJsonVisitor
726
774
  }
727
775
 
728
776
  @Test(expected = ConfigException.class)
729
- public void constructor_mustBeRaisedConfigExceptionWithScanPathToken()
777
+ public void configException_ScanPathToken()
730
778
  {
731
779
  PluginTask task = taskFromYamlString(
732
780
  "type: column",
@@ -739,7 +787,7 @@ public class TestJsonVisitor
739
787
  }
740
788
 
741
789
  @Test(expected = ConfigException.class)
742
- public void constructor_mustBeRaisedConfigExceptionWithMultiIndexOperation()
790
+ public void configException_MultiIndexOperation()
743
791
  {
744
792
  PluginTask task = taskFromYamlString(
745
793
  "type: column",
@@ -752,7 +800,7 @@ public class TestJsonVisitor
752
800
  }
753
801
 
754
802
  @Test(expected = ConfigException.class)
755
- public void constructor_mustBeRaisedConfigExceptionWithMultiIndexOperationAtMiddlePosition()
803
+ public void configException_IndexOperationAtMiddlePosition()
756
804
  {
757
805
  PluginTask task = taskFromYamlString(
758
806
  "type: column",
@@ -765,7 +813,7 @@ public class TestJsonVisitor
765
813
  }
766
814
 
767
815
  @Test(expected = ConfigException.class)
768
- public void constructor_mustBeRaisedConfigExceptionWithMArraySliceOperation()
816
+ public void configException_ArraySliceOperation()
769
817
  {
770
818
  PluginTask task = taskFromYamlString(
771
819
  "type: column",
@@ -778,7 +826,7 @@ public class TestJsonVisitor
778
826
  }
779
827
 
780
828
  @Test(expected = ConfigException.class)
781
- public void constructor_mustBeRaisedConfigExceptionWithMArraySliceOperationAtMiddlePosition()
829
+ public void configException_MArraySliceOperationAtMiddlePosition()
782
830
  {
783
831
  PluginTask task = taskFromYamlString(
784
832
  "type: column",
@@ -794,7 +842,7 @@ public class TestJsonVisitor
794
842
  public ExpectedException thrown = ExpectedException.none();
795
843
 
796
844
  @Test
797
- public void constructor_mustBeRaisedConfigExceptionEffectively()
845
+ public void configException_PathCompileError()
798
846
  {
799
847
  PluginTask task = taskFromYamlString(
800
848
  "type: column",
@@ -808,4 +856,4 @@ public class TestJsonVisitor
808
856
 
809
857
  jsonVisitor(task, inputSchema);
810
858
  }
811
- }
859
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.pre2
4
+ version: 0.6.0.pre3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-17 00:00:00.000000000 Z
11
+ date: 2016-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,9 +52,13 @@ files:
52
52
  - README.md
53
53
  - build.gradle
54
54
  - config/checkstyle/checkstyle.xml
55
+ - example/add_columns.txt
55
56
  - example/add_columns.yml
57
+ - example/bracket_notations.txt
56
58
  - example/bracket_notations.yml
59
+ - example/columns.txt
57
60
  - example/columns.yml
61
+ - example/drop_columns.txt
58
62
  - example/drop_columns.yml
59
63
  - example/edgecase.tsv
60
64
  - example/edgecase.yml
@@ -69,8 +73,8 @@ files:
69
73
  - src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java
70
74
  - src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
71
75
  - src/main/java/org/embulk/filter/column/JsonColumn.java
76
+ - src/main/java/org/embulk/filter/column/JsonPathUtil.java
72
77
  - src/main/java/org/embulk/filter/column/JsonVisitor.java
73
- - src/main/java/org/embulk/filter/column/PathTokenUtil.java
74
78
  - src/test/java/org/embulk/filter/column/TestColumnFilterPlugin.java
75
79
  - src/test/java/org/embulk/filter/column/TestColumnVisitorImpl.java
76
80
  - src/test/java/org/embulk/filter/column/TestJsonColumn.java
@@ -78,9 +82,9 @@ files:
78
82
  - classpath/accessors-smart-1.1.jar
79
83
  - classpath/asm-5.0.3.jar
80
84
  - classpath/commons-lang3-3.4.jar
81
- - classpath/embulk-filter-column-0.6.0.pre2.jar
85
+ - classpath/embulk-filter-column-0.6.0.pre3.jar
82
86
  - classpath/json-smart-2.2.1.jar
83
- - classpath/JsonPathCompiler-0.0.6.jar
87
+ - classpath/JsonPathCompiler-0.0.11.jar
84
88
  - classpath/slf4j-api-1.7.21.jar
85
89
  homepage: https://github.com/sonots/embulk-filter-column
86
90
  licenses:
@@ -1,39 +0,0 @@
1
- package org.embulk.filter.column;
2
-
3
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayIndexOperation;
4
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayPathToken;
5
- import com.dena.analytics.jsonpathcompiler.expressions.path.FunctionPathToken;
6
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
7
- import com.dena.analytics.jsonpathcompiler.expressions.path.PredicatePathToken;
8
- import com.dena.analytics.jsonpathcompiler.expressions.path.ScanPathToken;
9
- import org.embulk.config.ConfigException;
10
-
11
- public class PathTokenUtil
12
- {
13
- public static void assertSupportedPathToken(PathToken pathToken, String path)
14
- {
15
- if (pathToken instanceof ArrayPathToken) {
16
- ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
17
- assertSupportedArrayPathToken(arrayIndexOperation, path);
18
- }
19
- else if (pathToken instanceof ScanPathToken) {
20
- throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
21
- }
22
- else if (pathToken instanceof FunctionPathToken) {
23
- throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
24
- }
25
- else if (pathToken instanceof PredicatePathToken) {
26
- throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
27
- }
28
- }
29
-
30
- public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
31
- {
32
- if (arrayIndexOperation == null) {
33
- throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
34
- }
35
- else if (!arrayIndexOperation.isSingleIndexOperation()) {
36
- throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
37
- }
38
- }
39
- }