embulk-filter-column 0.6.0.pre2 → 0.6.0.pre3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b693b9f514309a94b2fea4e85714e6e0c9de0f1
4
- data.tar.gz: ed56dc5b557605436985615eacdbc71b700a7640
3
+ metadata.gz: d26c69b6a7bfb4a6c4d68c6967301a0dc32562f5
4
+ data.tar.gz: c674640721b35540a9aba9aef1ad76c5cb5adc96
5
5
  SHA512:
6
- metadata.gz: 5d1399f4e807811687c032a1738254bc2013d0a760a88343ec03d3bd3b36309cadb46f1c805859d68724a1bb40b0bc73e1908dbab939cb54e453f7a6db93b5b1
7
- data.tar.gz: 7937b001df59b540c8a10fcbd4e3c707565d5e9f9ca4e919d96b7403252d74b654ee8ca0b476fe53d504192ef9302b678f209bed30071e873af48f0a74d8ae2e
6
+ metadata.gz: 9cc5e8165d85afed48b65302d0baa9f54e35b1ae5438f390b5b5970d8342e14cf72b30f137ca901229578c3493b23a4c3a6286ea076501bc926eaed97491f70f
7
+ data.tar.gz: 867fa85522091768287f5b4ade0b7e17947899e1f6ccbeeb017d5115fb0468d418c62712af308a80580afe1be6bb5a2f3ac73c7c1274b11d4c674dd8c8635a66
data/CHANGELOG.md CHANGED
@@ -1,8 +1,9 @@
1
- # 0.6.0 (2016-10-01)
1
+ # 0.6.0 (2016-XX-XX)
2
2
 
3
3
  Enhancements:
4
4
 
5
5
  * Support bracket notation in json path (thanks to @kysnm)
6
+ * Autocomplete ancestor json paths
6
7
 
7
8
  # 0.5.4 (2016-08-05)
8
9
 
data/README.md CHANGED
@@ -133,27 +133,15 @@ NOTE:
133
133
 
134
134
  NOTE:
135
135
 
136
- To deeply visit json path such as `$.payload.foo.bar`, you have to write its upper paths together like:
136
+ Rename or copy of json paths by `src` option is only partially supported yet. The parent json path must be same like:
137
137
 
138
138
  ```
139
- - (name: $.payload.foo}
140
- - {name: $.payload.foo.bar}
141
- ```
142
-
143
- NOTE:
144
-
145
- `src` (to rename or copy columns) is only partially supported yet. The upper json path must be same like:
146
-
147
- ```
148
- - {name: $.payload.foo}
149
139
  - {name: $.payload.foo.dest, src: $.payload.foo.src}
150
140
  ```
151
141
 
152
- Below does not work yet.
142
+ I mean that below example does not work yet (`$.payload.foo` and `$.payload.bar`)
153
143
 
154
144
  ```
155
- - {name: $.payload.foo}
156
- - {name: $.payload.bar}
157
145
  - {name: $.payload.foo.dest, src: $.payload.bar.src}
158
146
  ```
159
147
 
data/build.gradle CHANGED
@@ -15,14 +15,14 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.6.0.pre2"
18
+ version = "0.6.0.pre3"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.+"
24
24
  provided "org.embulk:embulk-core:0.8.+"
25
- compile 'com.dena.analytics:JsonPathCompiler:0.0.6'
25
+ compile "io.github.medjed:JsonPathCompiler:0.0.+"
26
26
 
27
27
  testCompile "junit:junit:4.+"
28
28
  testCompile "org.embulk:embulk-core:0.8.+:tests"
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:29:31.933 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:29:32.859 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:29:32.876 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:29:32.882 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
6
+ | time:timestamp | id:long | name:string | score:double | json:json | foo:long | d:timestamp | t:timestamp | copy_score:double |
7
+ +-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
8
+ | 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo","bar":"bar","d":"2015-07-13","copy_foo":"foo"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
9
+ | 2015-07-13 00:00:00 UTC | 0 | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"foo":"foo2","bar":1,"d":"2015-07-13","copy_foo":"foo2"} | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 1370.0 |
10
+ | | | | 9170.0 | | 1 | 2015-07-12 15:00:00 UTC | 2015-07-13 00:00:00 UTC | 9170.0 |
11
+ +-------------------------+---------+---------------------------------------------+--------------+-------------------------------------------------------------+----------+-------------------------+-------------------------+-------------------+
@@ -23,7 +23,7 @@ filters:
23
23
  - {name: d, type: timestamp, default: "2015-07-13"}
24
24
  - {name: t, type: timestamp, default: "2015-07-13 00:00:00", timezone: "UTC", format: "%Y-%m-%d %H:%M:%S"}
25
25
  - {name: copy_score, src: score}
26
- - {name: $.json.foo, type: long, default: 1}
26
+ - {name: $.json.bar, type: long, default: 1}
27
27
  - {name: $.json.d, type: string, default: "2015-07-13"}
28
28
  - {name: $.json.copy_foo, src: $.json.foo}
29
29
  out:
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:29:47.367 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:29:48.254 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:29:48.270 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:29:48.274 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
6
+ | time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
7
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
8
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
9
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2","array_a":[{"a":"a0"}],"array_b":[{"b":"b"},{"b":"b"}]} |
10
+ | 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO","array_a":[{"a":"default0"}],"array_b":[{"b":"default"},{"b":"default"}]} |
11
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+---------------------------------------------------------------------------------------------------------+
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:30:03.618 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:30:04.499 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:30:04.516 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:30:04.521 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
6
+ | time:timestamp | name:string | foo:long | id:long | copy_score:double | json:json |
7
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
8
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo","copy_foo":"foo"} |
9
+ | 2015-07-13 00:00:00 UTC | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1 | 0 | 1370.0 | {"foo":"foo2","copy_foo":"foo2"} |
10
+ | 2015-07-13 00:00:00 UTC | foo | 1 | | 9170.0 | {"foo":"FOO","copy_foo":"FOO"} |
11
+ +-------------------------+---------------------------------------------+----------+---------+-------------------+----------------------------------+
@@ -0,0 +1,11 @@
1
+ 2016-10-20 02:30:16.407 +0900: Embulk v0.8.6
2
+ 2016-10-20 02:30:17.290 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/column from a load path
3
+ 2016-10-20 02:30:17.305 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'example.csv'
4
+ 2016-10-20 02:30:17.310 +0900 [INFO] (0001:preview): Loading files [example/example.csv]
5
+ +---------------------------------------------+--------------+---------------+
6
+ | name:string | score:double | json:json |
7
+ +---------------------------------------------+--------------+---------------+
8
+ | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {"bar":"bar"} |
9
+ | Vqjht6YEUBsMPXmoW1iOGFROZF27pBzz0TUkOKeDXEY | 1370.0 | {} |
10
+ | | 9170.0 | |
11
+ +---------------------------------------------+--------------+---------------+
@@ -27,7 +27,7 @@ import org.slf4j.Logger;
27
27
 
28
28
  import java.util.List;
29
29
 
30
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathCompiler;
30
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
31
31
 
32
32
  public class ColumnFilterPlugin implements FilterPlugin
33
33
  {
@@ -1,6 +1,6 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
- import com.dena.analytics.jsonpathcompiler.expressions.Utils;
3
+ import io.github.medjed.jsonpathcompiler.expressions.Utils;
4
4
  import com.google.common.base.Throwables;
5
5
 
6
6
  import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
@@ -1,12 +1,12 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
- import com.dena.analytics.jsonpathcompiler.expressions.Path;
4
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayIndexOperation;
5
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayPathToken;
6
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathCompiler;
7
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
8
- import com.dena.analytics.jsonpathcompiler.expressions.path.RootPathToken;
9
- import com.dena.analytics.jsonpathcompiler.expressions.path.PropertyPathToken;
3
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
4
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken;
10
10
  import org.embulk.config.ConfigException;
11
11
  import org.embulk.spi.type.Type;
12
12
  import org.msgpack.value.StringValue;
@@ -56,17 +56,14 @@ public class JsonColumn
56
56
  this.pathValue = ValueFactory.newString(path);
57
57
  this.parentPath = compiledPath.getParentPath();
58
58
 
59
- if (compiledRoot.getTailPath().equals("[*]")) {
60
- throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", path));
61
- }
62
- this.tailIndex = tailIndex(compiledRoot);
59
+ this.tailIndex = getTailIndex(compiledRoot);
63
60
  this.parentPathValue = ValueFactory.newString(parentPath);
64
61
  String tailName = getTailName(compiledRoot);
65
62
  this.tailNameValue = tailName == null ? ValueFactory.newNil() : ValueFactory.newString(tailName);
66
63
 
67
64
  this.srcValue = ValueFactory.newString(this.src);
68
65
  this.srcParentPath = compiledSrc.getParentPath();
69
- this.srcTailIndex = tailIndex(compiledSrcRoot);
66
+ this.srcTailIndex = getTailIndex(compiledSrcRoot);
70
67
  this.srcParentPathValue = ValueFactory.newString(this.srcParentPath);
71
68
  String srcTailName = getTailName(compiledSrcRoot);
72
69
  this.srcTailNameValue = srcTailName == null ? ValueFactory.newNil() : ValueFactory.newString(srcTailName);
@@ -92,12 +89,12 @@ public class JsonColumn
92
89
  }
93
90
  }
94
91
 
95
- private Long tailIndex(RootPathToken root)
92
+ private Long getTailIndex(RootPathToken root)
96
93
  {
97
94
  PathToken tail = root.getTail();
98
95
  if (tail instanceof ArrayPathToken) {
99
96
  ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) tail).getArrayIndexOperation();
100
- PathTokenUtil.assertSupportedArrayPathToken(arrayIndexOperation, path);
97
+ JsonPathUtil.assertSupportedArrayPathToken(arrayIndexOperation, path);
101
98
  return arrayIndexOperation.indexes().get(0).longValue();
102
99
  }
103
100
  else {
@@ -135,7 +132,7 @@ public class JsonColumn
135
132
  return parentPath;
136
133
  }
137
134
 
138
- public Long tailIndex()
135
+ public Long getTailIndex()
139
136
  {
140
137
  return tailIndex;
141
138
  }
@@ -186,7 +183,7 @@ public class JsonColumn
186
183
  return ((RootPathToken) PathCompiler.compile(path).getRoot()).getTailPath();
187
184
  }
188
185
 
189
- public static Long tailIndex(String path)
186
+ public static Long getTailIndex(String path)
190
187
  {
191
188
  Path compiledPath = PathCompiler.compile(path);
192
189
  PathToken tail = ((RootPathToken) compiledPath.getRoot()).getTail();
@@ -0,0 +1,76 @@
1
+ package org.embulk.filter.column;
2
+
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken;
8
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
9
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
10
+ import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken;
11
+ import io.github.medjed.jsonpathcompiler.expressions.path.RootPathToken;
12
+ import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken;
13
+ import org.embulk.config.ConfigException;
14
+
15
+ public class JsonPathUtil
16
+ {
17
+ public static void assertJsonPathFormat(String path)
18
+ {
19
+ Path compiledPath;
20
+ try {
21
+ compiledPath = PathCompiler.compile(path);
22
+ }
23
+ catch (InvalidPathException e) {
24
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
25
+ }
26
+ PathToken pathToken = compiledPath.getRoot();
27
+ while (true) {
28
+ assertSupportedPathToken(pathToken, path);
29
+ if (pathToken.isLeaf()) {
30
+ break;
31
+ }
32
+ pathToken = pathToken.next();
33
+ }
34
+ }
35
+
36
+ public static void assertSupportedPathToken(PathToken pathToken, String path)
37
+ {
38
+ if (pathToken instanceof ArrayPathToken) {
39
+ ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
40
+ assertSupportedArrayPathToken(arrayIndexOperation, path);
41
+ }
42
+ else if (pathToken instanceof ScanPathToken) {
43
+ throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
44
+ }
45
+ else if (pathToken instanceof FunctionPathToken) {
46
+ throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
47
+ }
48
+ else if (pathToken instanceof PredicatePathToken) {
49
+ throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
50
+ }
51
+ }
52
+
53
+ public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
54
+ {
55
+ if (arrayIndexOperation == null) {
56
+ throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
57
+ }
58
+ else if (!arrayIndexOperation.isSingleIndexOperation()) {
59
+ throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
60
+ }
61
+ }
62
+
63
+ public static void assertDoNotEndsWithArrayWildcard(String path)
64
+ {
65
+ Path compiledPath;
66
+ try {
67
+ compiledPath = PathCompiler.compile(path);
68
+ }
69
+ catch (InvalidPathException e) {
70
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
71
+ }
72
+ if (((RootPathToken) compiledPath.getRoot()).getTailPath().equals("[*]")) {
73
+ throw new ConfigException(String.format("%s wrongly ends with [*], perhaps you can remove the [*]", compiledPath.toString()));
74
+ }
75
+ }
76
+ }
@@ -1,9 +1,10 @@
1
1
  package org.embulk.filter.column;
2
2
 
3
- import com.dena.analytics.jsonpathcompiler.InvalidPathException;
4
- import com.dena.analytics.jsonpathcompiler.expressions.Path;
5
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathCompiler;
6
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
3
+ import io.github.medjed.jsonpathcompiler.InvalidPathException;
4
+ import io.github.medjed.jsonpathcompiler.expressions.Path;
5
+ import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken;
6
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler;
7
+ import io.github.medjed.jsonpathcompiler.expressions.path.PathToken;
7
8
  import org.embulk.config.ConfigException;
8
9
  import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
9
10
  import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
@@ -18,6 +19,7 @@ import org.embulk.spi.type.LongType;
18
19
  import org.embulk.spi.type.StringType;
19
20
  import org.embulk.spi.type.TimestampType;
20
21
  import org.embulk.spi.type.Type;
22
+ import org.embulk.spi.type.Types;
21
23
  import org.msgpack.value.ArrayValue;
22
24
  import org.msgpack.value.MapValue;
23
25
  import org.msgpack.value.Value;
@@ -38,9 +40,13 @@ public class JsonVisitor
38
40
  final PluginTask task;
39
41
  final Schema inputSchema;
40
42
  final Schema outputSchema;
43
+ // jsonpath
41
44
  final HashSet<String> shouldVisitSet = new HashSet<>();
45
+ // parent jsonpath => { jsonpath => json column }
42
46
  final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
47
+ // parent jsonpath => { jsonpath => json column }
43
48
  final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
49
+ // parent jsonpath => [ jsonpath ]
44
50
  final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
45
51
 
46
52
  JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
@@ -92,6 +98,18 @@ public class JsonVisitor
92
98
  jsonColumns.get(parentPath).put(compiledPath.toString(), value);
93
99
  }
94
100
 
101
+ private boolean jsonColumnsContainsKey(String path)
102
+ {
103
+ Path compiledPath = PathCompiler.compile(path);
104
+ String parentPath = compiledPath.getParentPath();
105
+ if (jsonColumns.containsKey(parentPath)) {
106
+ return jsonColumns.get(parentPath).containsKey(compiledPath.toString());
107
+ }
108
+ else {
109
+ return false;
110
+ }
111
+ }
112
+
95
113
  private void jsonAddColumnsPut(String path, JsonColumn value)
96
114
  {
97
115
  Path compiledPath = PathCompiler.compile(path);
@@ -102,6 +120,18 @@ public class JsonVisitor
102
120
  jsonAddColumns.get(parentPath).put(compiledPath.toString(), value);
103
121
  }
104
122
 
123
+ private boolean jsonAddColumnsContainsKey(String path)
124
+ {
125
+ Path compiledPath = PathCompiler.compile(path);
126
+ String parentPath = compiledPath.getParentPath();
127
+ if (jsonAddColumns.containsKey(parentPath)) {
128
+ return jsonAddColumns.get(parentPath).containsKey(compiledPath.toString());
129
+ }
130
+ else {
131
+ return false;
132
+ }
133
+ }
134
+
105
135
  private void jsonDropColumnsPut(String path)
106
136
  {
107
137
  Path compiledPath = PathCompiler.compile(path);
@@ -112,68 +142,98 @@ public class JsonVisitor
112
142
  jsonDropColumns.get(parentPath).add(compiledPath.toString());
113
143
  }
114
144
 
115
- // build jsonColumns, jsonAddColumns, and jsonDropColumns
116
- private void buildJsonSchema()
145
+ private void buildJsonColumns()
117
146
  {
118
147
  List<ColumnConfig> columns = task.getColumns();
119
- List<ColumnConfig> addColumns = task.getAddColumns();
120
- List<ColumnConfig> dropColumns = task.getDropColumns();
121
-
122
- int i = 0;
123
- if (dropColumns.size() > 0) {
124
- for (ColumnConfig dropColumn : dropColumns) {
125
- String name = dropColumn.getName();
126
- // skip NON json path notation to build output schema
127
- if (! PathCompiler.isProbablyJsonPath(name)) {
128
- continue;
148
+ for (ColumnConfig column : columns) {
149
+ String name = column.getName();
150
+ // skip NON json path notation to build output schema
151
+ if (! PathCompiler.isProbablyJsonPath(name)) {
152
+ continue;
153
+ }
154
+ JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
155
+ // automatically fill ancestor jsonpaths
156
+ for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
157
+ String ancestorJsonPath = ancestorJsonColumn.getPath();
158
+ if (!jsonColumnsContainsKey(ancestorJsonPath)) {
159
+ jsonColumnsPut(ancestorJsonPath, ancestorJsonColumn);
129
160
  }
130
- jsonDropColumnsPut(name);
161
+ }
162
+ // leaf jsonpath
163
+ if (column.getSrc().isPresent()) {
164
+ String src = column.getSrc().get();
165
+ jsonColumnsPut(name, new JsonColumn(name, null, null, src));
166
+ }
167
+ else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
168
+ Type type = column.getType().get();
169
+ Value defaultValue = getDefault(task, name, type, column);
170
+ jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
171
+ }
172
+ else {
173
+ Type type = column.getType().isPresent() ? column.getType().get() : null;
174
+ jsonColumnsPut(name, new JsonColumn(name, type));
131
175
  }
132
176
  }
133
- else if (columns.size() > 0) {
134
- for (ColumnConfig column : columns) {
135
- String name = column.getName();
136
- // skip NON json path notation to build output schema
137
- if (! PathCompiler.isProbablyJsonPath(name)) {
138
- continue;
139
- }
140
- if (column.getSrc().isPresent()) {
141
- String src = column.getSrc().get();
142
- jsonColumnsPut(name, new JsonColumn(name, null, null, src));
143
- }
144
- else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
145
- Type type = column.getType().get();
146
- Value defaultValue = getDefault(task, name, type, column);
147
- jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
148
- }
149
- else {
150
- Type type = column.getType().isPresent() ? column.getType().get() : null;
151
- jsonColumnsPut(name, new JsonColumn(name, type));
177
+ }
178
+
179
+ private void buildJsonAddColumns()
180
+ {
181
+ List<ColumnConfig> addColumns = task.getAddColumns();
182
+ for (ColumnConfig column : addColumns) {
183
+ String name = column.getName();
184
+ // skip NON json path notation to build output schema
185
+ if (! PathCompiler.isProbablyJsonPath(name)) {
186
+ continue;
187
+ }
188
+ JsonPathUtil.assertDoNotEndsWithArrayWildcard(name);
189
+ // automatically fill ancestor jsonpaths
190
+ for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
191
+ String ancestorJsonPath = ancestorJsonColumn.getPath();
192
+ if (!jsonAddColumnsContainsKey(ancestorJsonPath)) {
193
+ jsonAddColumnsPut(ancestorJsonPath, ancestorJsonColumn);
152
194
  }
153
195
  }
196
+ // leaf jsonpath
197
+ if (column.getSrc().isPresent()) {
198
+ String src = column.getSrc().get();
199
+ jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
200
+ }
201
+ else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
202
+ Type type = column.getType().get();
203
+ Value defaultValue = getDefault(task, name, type, column);
204
+ jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
205
+ }
206
+ else {
207
+ throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
208
+ }
154
209
  }
210
+ }
155
211
 
156
- // Add columns to last. If you want to add to head or middle, you can use `columns` option
157
- if (addColumns.size() > 0) {
158
- for (ColumnConfig column : addColumns) {
159
- String name = column.getName();
160
- // skip NON json path notation to build output schema
161
- if (! PathCompiler.isProbablyJsonPath(name)) {
162
- continue;
163
- }
164
- if (column.getSrc().isPresent()) {
165
- String src = column.getSrc().get();
166
- jsonAddColumnsPut(name, new JsonColumn(name, null, null, src));
167
- }
168
- else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
169
- Type type = column.getType().get();
170
- Value defaultValue = getDefault(task, name, type, column);
171
- jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
172
- }
173
- else {
174
- throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"src\", or \"type\" and \"default\"", name));
175
- }
212
+ private void buildJsonDropColumns()
213
+ {
214
+ List<ColumnConfig> dropColumns = task.getDropColumns();
215
+ for (ColumnConfig dropColumn : dropColumns) {
216
+ String name = dropColumn.getName();
217
+ // skip NON json path notation to build output schema
218
+ if (! PathCompiler.isProbablyJsonPath(name)) {
219
+ continue;
176
220
  }
221
+ jsonDropColumnsPut(name);
222
+ }
223
+ }
224
+
225
+ // build jsonColumns, jsonAddColumns, and jsonDropColumns
226
+ private void buildJsonSchema()
227
+ {
228
+ if (task.getDropColumns().size() > 0) {
229
+ buildJsonDropColumns();
230
+ }
231
+ else if (task.getColumns().size() > 0) {
232
+ buildJsonColumns();
233
+ }
234
+ // Add columns to last. If you want to add to head or middle, you can use `columns` option
235
+ if (task.getAddColumns().size() > 0) {
236
+ buildJsonAddColumns();
177
237
  }
178
238
  }
179
239
 
@@ -189,23 +249,53 @@ public class JsonVisitor
189
249
  if (!PathCompiler.isProbablyJsonPath(name)) {
190
250
  continue;
191
251
  }
192
- Path path;
193
- try {
194
- path = PathCompiler.compile(name);
195
- } catch (InvalidPathException e) {
196
- throw new ConfigException(String.format("path %s, %s", name, e.getMessage()));
252
+ JsonPathUtil.assertJsonPathFormat(name);
253
+ for (JsonColumn ancestorJsonColumn : getAncestorJsonColumnList(name)) {
254
+ this.shouldVisitSet.add(ancestorJsonColumn.getPath());
197
255
  }
198
- PathToken parts = path.getRoot();
199
- int count = parts.getTokenCount();
200
- StringBuilder partialPath = new StringBuilder("$");
201
- // skip "$"
202
- for (int i = 1; i < count; i++) {
203
- parts = parts.next();
204
- PathTokenUtil.assertSupportedPathToken(parts, name);
205
- partialPath.append(parts.getPathFragment().toString());
206
- this.shouldVisitSet.add(partialPath.toString());
256
+ Path path = PathCompiler.compile(name);
257
+ this.shouldVisitSet.add(path.toString());
258
+ }
259
+ }
260
+
261
+ /*
262
+ * <pre>
263
+ * $['foo']['bar'][0]['baz']
264
+ * #=>
265
+ * name: $['foo'], type: json, default: {}
266
+ * name: $['foo']['bar'], type: json, default: []
267
+ * name: $['foo']['bar'][0], type: json, default: {}
268
+ * </pre>
269
+ *
270
+ * @return ancestors as an array
271
+ */
272
+ public static ArrayList<JsonColumn> getAncestorJsonColumnList(String path)
273
+ {
274
+ ArrayList<JsonColumn> ancestorJsonColumnList = new ArrayList<>();
275
+ Path compiledPath;
276
+ try {
277
+ compiledPath = PathCompiler.compile(path);
278
+ }
279
+ catch (InvalidPathException e) {
280
+ throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage()));
281
+ }
282
+ StringBuilder partialPath = new StringBuilder("$");
283
+ PathToken parts = compiledPath.getRoot();
284
+ parts = parts.next(); // skip "$"
285
+ while (! parts.isLeaf()) {
286
+ partialPath.append(parts.getPathFragment());
287
+ PathToken next = parts.next();
288
+ JsonColumn jsonColumn;
289
+ if (next instanceof ArrayPathToken) {
290
+ jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newArray(new Value[0], false));
291
+ }
292
+ else {
293
+ jsonColumn = new JsonColumn(partialPath.toString(), Types.JSON, ValueFactory.newMap(new Value[0]));
207
294
  }
295
+ ancestorJsonColumnList.add(jsonColumn);
296
+ parts = next;
208
297
  }
298
+ return ancestorJsonColumnList;
209
299
  }
210
300
 
211
301
  boolean shouldVisit(String jsonPath)
@@ -253,7 +343,7 @@ public class JsonVisitor
253
343
  }
254
344
  String newPath = jsonColumn.getPath();
255
345
  Value visited = visit(newPath, v);
256
- // int i = jsonColumn.tailIndex().intValue();
346
+ // int i = jsonColumn.getTailIndex().intValue();
257
347
  // index is shifted, so j++ is used.
258
348
  newValue.add(j++, visited == null ? ValueFactory.newNil() : visited);
259
349
  }
@@ -267,6 +357,12 @@ public class JsonVisitor
267
357
  }
268
358
  if (this.jsonAddColumns.containsKey(rootPath)) {
269
359
  for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
360
+ int i = jsonColumn.getTailIndex().intValue();
361
+ if (i < size) {
362
+ // index for add_columns must be larger than size
363
+ // just skip because we can not raise ConfigException beforehand for flexible JSON
364
+ continue;
365
+ }
270
366
  int src = jsonColumn.getSrcTailIndex().intValue();
271
367
  Value v = (src < arrayValue.size() ? arrayValue.get(src) : null);
272
368
  if (v == null) {
@@ -326,6 +422,12 @@ public class JsonVisitor
326
422
  if (this.jsonAddColumns.containsKey(rootPath)) {
327
423
  Map<Value, Value> map = mapValue.map();
328
424
  for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
425
+ Value k = jsonColumn.getTailNameValue();
426
+ if (map.containsKey(k)) {
427
+ // key must be different with already existing one for add_columns
428
+ // just skip because we can not raise ConfigException beforehand for flexible JSON
429
+ continue;
430
+ }
329
431
  Value src = jsonColumn.getSrcTailNameValue();
330
432
  Value v = map.get(src);
331
433
  if (v == null) {
@@ -67,13 +67,13 @@ public class TestJsonColumn
67
67
  }
68
68
 
69
69
  @Test
70
- public void tailIndex()
70
+ public void getTailIndex()
71
71
  {
72
- assertEquals(null, JsonColumn.tailIndex("$['foo'].bar.baz"));
73
- assertEquals(null, JsonColumn.tailIndex("$.foo.bar"));
74
- assertEquals(null, JsonColumn.tailIndex("$.foo"));
75
- assertEquals(new Long(1), JsonColumn.tailIndex("$.foo[0][1]"));
76
- assertEquals(new Long(0), JsonColumn.tailIndex("$.foo[0]"));
77
- assertEquals(new Long(0), JsonColumn.tailIndex("$[0]"));
72
+ assertEquals(null, JsonColumn.getTailIndex("$['foo'].bar.baz"));
73
+ assertEquals(null, JsonColumn.getTailIndex("$.foo.bar"));
74
+ assertEquals(null, JsonColumn.getTailIndex("$.foo"));
75
+ assertEquals(new Long(1), JsonColumn.getTailIndex("$.foo[0][1]"));
76
+ assertEquals(new Long(0), JsonColumn.getTailIndex("$.foo[0]"));
77
+ assertEquals(new Long(0), JsonColumn.getTailIndex("$[0]"));
78
78
  }
79
- }
79
+ }
@@ -14,13 +14,14 @@ import org.junit.rules.ExpectedException;
14
14
  import org.msgpack.value.MapValue;
15
15
  import org.msgpack.value.Value;
16
16
  import org.msgpack.value.ValueFactory;
17
- import com.dena.analytics.jsonpathcompiler.InvalidPathException;
18
17
 
19
18
  import static org.embulk.spi.type.Types.JSON;
20
19
  import static org.junit.Assert.assertEquals;
21
20
  import static org.junit.Assert.assertFalse;
22
21
  import static org.junit.Assert.assertTrue;
23
22
 
23
+ import java.util.ArrayList;
24
+ import java.util.Arrays;
24
25
  import java.util.HashMap;
25
26
  import java.util.HashSet;
26
27
 
@@ -58,16 +59,32 @@ public class TestJsonVisitor
58
59
  return new JsonVisitor(task, inputSchema, outputSchema);
59
60
  }
60
61
 
61
- @Test(expected = ConfigException.class)
62
- public void configException_Columns()
62
+ @Test
63
+ public void getAncestorJsonColumnList()
63
64
  {
64
- PluginTask task = taskFromYamlString(
65
- "type: column",
66
- "columns:",
67
- " - {name: \"$.json1.b.b[*]\"}");
68
- Schema inputSchema = Schema.builder().build();
69
- // b[*] should be written as b
70
- jsonVisitor(task, inputSchema);
65
+ ArrayList<JsonColumn> subject;
66
+
67
+ subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default");
68
+ assertEquals("$['json1']", subject.get(0).getPath());
69
+ assertTrue(subject.get(0).getDefaultValue().isMapValue());
70
+ assertEquals("$['json1']['a']", subject.get(1).getPath());
71
+ assertTrue(subject.get(1).getDefaultValue().isMapValue());
72
+
73
+ subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a[0].default");
74
+ assertEquals("$['json1']", subject.get(0).getPath());
75
+ assertTrue(subject.get(0).getDefaultValue().isMapValue());
76
+ assertEquals("$['json1']['a']", subject.get(1).getPath());
77
+ assertTrue(subject.get(1).getDefaultValue().isArrayValue());
78
+ assertEquals("$['json1']['a'][0]", subject.get(2).getPath());
79
+ assertTrue(subject.get(2).getDefaultValue().isMapValue());
80
+
81
+ subject = JsonVisitor.getAncestorJsonColumnList("$.json1.a.default[0]");
82
+ assertEquals("$['json1']", subject.get(0).getPath());
83
+ assertTrue(subject.get(0).getDefaultValue().isMapValue());
84
+ assertEquals("$['json1']['a']", subject.get(1).getPath());
85
+ assertTrue(subject.get(1).getDefaultValue().isMapValue());
86
+ assertEquals("$['json1']['a']['default']", subject.get(2).getPath());
87
+ assertTrue(subject.get(2).getDefaultValue().isArrayValue());
71
88
  }
72
89
 
73
90
  @Test
@@ -103,7 +120,7 @@ public class TestJsonVisitor
103
120
  }
104
121
 
105
122
  @Test
106
- public void buildJsonSchema_DropColumns()
123
+ public void buildJsonDropColumns()
107
124
  {
108
125
  PluginTask task = taskFromYamlString(
109
126
  "type: column",
@@ -135,8 +152,32 @@ public class TestJsonVisitor
135
152
  }
136
153
  }
137
154
 
155
+ @Test(expected = ConfigException.class)
156
+ public void configException_Columns()
157
+ {
158
+ PluginTask task = taskFromYamlString(
159
+ "type: column",
160
+ "columns:",
161
+ " - {name: \"$.json1.b.b[*]\"}");
162
+ Schema inputSchema = Schema.builder().build();
163
+ // b[*] should be written as b
164
+ jsonVisitor(task, inputSchema);
165
+ }
166
+
167
+ @Test(expected = ConfigException.class)
168
+ public void buildJsonAddColumns_ConfigException()
169
+ {
170
+ PluginTask task = taskFromYamlString(
171
+ "type: column",
172
+ "add_columns:",
173
+ " - {name: \"$.json1.b.b[*]\", type: json, default: []}");
174
+ Schema inputSchema = Schema.builder().build();
175
+ // b[*] should be written as b
176
+ jsonVisitor(task, inputSchema);
177
+ }
178
+
138
179
  @Test
139
- public void buildJsonSchema_AddColumns()
180
+ public void buildJsonAddColumns()
140
181
  {
141
182
  PluginTask task = taskFromYamlString(
142
183
  "type: column",
@@ -150,19 +191,22 @@ public class TestJsonVisitor
150
191
  .build();
151
192
  JsonVisitor subject = jsonVisitor(task, inputSchema);
152
193
 
153
- assertFalse(subject.jsonAddColumns.containsKey("$['json1']"));
194
+ assertTrue(subject.jsonAddColumns.containsKey("$"));
195
+ assertTrue(subject.jsonAddColumns.containsKey("$['json1']"));
154
196
  assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']"));
155
197
  assertTrue(subject.jsonAddColumns.containsKey("$['json1']['a']['copy_array']"));
156
198
 
157
199
  {
158
200
  HashMap<String, JsonColumn> jsonColumns = subject.jsonAddColumns.get("$['json1']['a']");
159
- assertEquals(2, jsonColumns.size());
201
+ assertEquals(3, jsonColumns.size());
160
202
  String[] keys = jsonColumns.keySet().toArray(new String[0]);
161
203
  JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
162
204
  assertEquals("$['json1']['a']['default']", keys[0]);
163
205
  assertEquals("$['json1']['a']['default']", values[0].getPath());
164
206
  assertEquals("$['json1']['a']['copy']", keys[1]);
165
207
  assertEquals("$['json1']['a']['copy']", values[1].getPath());
208
+ assertEquals("$['json1']['a']['copy_array']", keys[2]);
209
+ assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
166
210
  }
167
211
 
168
212
  {
@@ -175,8 +219,20 @@ public class TestJsonVisitor
175
219
  }
176
220
  }
177
221
 
222
+ @Test(expected = ConfigException.class)
223
+ public void buildJsonColumns_ConfigException()
224
+ {
225
+ PluginTask task = taskFromYamlString(
226
+ "type: column",
227
+ "columns:",
228
+ " - {name: \"$.json1.b.b[*]\"}");
229
+ Schema inputSchema = Schema.builder().build();
230
+ // b[*] should be written as b
231
+ jsonVisitor(task, inputSchema);
232
+ }
233
+
178
234
  @Test
179
- public void buildJsonSchema_Columns()
235
+ public void buildJsonColumns()
180
236
  {
181
237
  PluginTask task = taskFromYamlString(
182
238
  "type: column",
@@ -190,19 +246,23 @@ public class TestJsonVisitor
190
246
  .build();
191
247
  JsonVisitor subject = jsonVisitor(task, inputSchema);
192
248
 
193
- assertFalse(subject.jsonColumns.containsKey("$['json1']"));
249
+ // 1st level keys are parents of jsonpath
250
+ assertTrue(subject.jsonColumns.containsKey("$"));
251
+ assertTrue(subject.jsonColumns.containsKey("$['json1']"));
194
252
  assertTrue(subject.jsonColumns.containsKey("$['json1']['a']"));
195
253
  assertTrue(subject.jsonColumns.containsKey("$['json1']['a']['copy_array']"));
196
254
 
197
255
  {
198
256
  HashMap<String, JsonColumn> jsonColumns = subject.jsonColumns.get("$['json1']['a']");
199
- assertEquals(2, jsonColumns.size());
257
+ assertEquals(3, jsonColumns.size());
200
258
  String[] keys = jsonColumns.keySet().toArray(new String[0]);
201
259
  JsonColumn[] values = jsonColumns.values().toArray(new JsonColumn[0]);
202
260
  assertEquals("$['json1']['a']['default']", keys[0]);
203
261
  assertEquals("$['json1']['a']['default']", values[0].getPath());
204
262
  assertEquals("$['json1']['a']['copy']", keys[1]);
205
263
  assertEquals("$['json1']['a']['copy']", values[1].getPath());
264
+ assertEquals("$['json1']['a']['copy_array']", keys[2]);
265
+ assertEquals("$['json1']['a']['copy_array']", values[2].getPath());
206
266
  }
207
267
 
208
268
  {
@@ -216,7 +276,7 @@ public class TestJsonVisitor
216
276
  }
217
277
 
218
278
  @Test
219
- public void buildJsonSchema_Mix()
279
+ public void buildJsonSchema()
220
280
  {
221
281
  PluginTask task = taskFromYamlString(
222
282
  "type: column",
@@ -269,7 +329,6 @@ public class TestJsonVisitor
269
329
  PluginTask task = taskFromYamlString(
270
330
  "type: column",
271
331
  "add_columns:",
272
- " - {name: $.json1.k3, type: json, default: \"{}\"}",
273
332
  " - {name: $.json1.k3.k3, type: string, default: v}",
274
333
  " - {name: $.json1.k4, src: $.json1.k2}");
275
334
  Schema inputSchema = Schema.builder()
@@ -297,8 +356,7 @@ public class TestJsonVisitor
297
356
  "type: column",
298
357
  "columns:",
299
358
  " - {name: $.json1.k1}",
300
- " - {name: $.json1.k2.k2}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
301
- " - {name: $.json1.k3, type: json, default: \"{}\"}",
359
+ " - {name: $.json1.k2.k2}",
302
360
  " - {name: $.json1.k3.k3, type: string, default: v}",
303
361
  " - {name: $.json1.k4, src: $.json1.k2}");
304
362
  Schema inputSchema = Schema.builder()
@@ -316,7 +374,7 @@ public class TestJsonVisitor
316
374
  k2, ValueFactory.newMap(k2, v));
317
375
 
318
376
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
319
- assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
377
+ assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
320
378
  }
321
379
 
322
380
  @Test
@@ -352,8 +410,6 @@ public class TestJsonVisitor
352
410
  "type: column",
353
411
  "add_columns:",
354
412
  " - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
355
- " - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
356
- " - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
357
413
  " - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
358
414
  Schema inputSchema = Schema.builder()
359
415
  .add("json1", JSON)
@@ -379,11 +435,8 @@ public class TestJsonVisitor
379
435
  PluginTask task = taskFromYamlString(
380
436
  "type: column",
381
437
  "columns:",
382
- " - {name: \"$.json1.k1\"}",
383
438
  " - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}",
384
- " - {name: \"$.json1.k2[0]\"}", // $.json1.k2 must be specified now, or $.json.k2 will be removed entirely
385
- " - {name: \"$.json1.k3\", type: json, default: \"[]\"}",
386
- " - {name: \"$.json1.k3[0]\", type: json, default: \"{}\"}",
439
+ " - {name: \"$.json1.k2[0]\"}",
387
440
  " - {name: \"$.json1.k3[0].k3\", type: string, default: v}");
388
441
  Schema inputSchema = Schema.builder()
389
442
  .add("json1", JSON)
@@ -400,7 +453,7 @@ public class TestJsonVisitor
400
453
  k2, ValueFactory.newArray(v, v));
401
454
 
402
455
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
403
- assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
456
+ assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
404
457
  }
405
458
 
406
459
  @Test
@@ -435,7 +488,6 @@ public class TestJsonVisitor
435
488
  PluginTask task = taskFromYamlString(
436
489
  "type: column",
437
490
  "add_columns:",
438
- " - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
439
491
  " - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
440
492
  " - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
441
493
  Schema inputSchema = Schema.builder()
@@ -464,7 +516,6 @@ public class TestJsonVisitor
464
516
  "columns:",
465
517
  " - {name: \"$['json1']['k1']\"}",
466
518
  " - {name: \"$['json1']['k2']['k2']\"}",
467
- " - {name: \"$['json1']['k3']\", type: json, default: \"{}\"}",
468
519
  " - {name: \"$['json1']['k3']['k3']\", type: string, default: v}",
469
520
  " - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}");
470
521
  Schema inputSchema = Schema.builder()
@@ -481,7 +532,7 @@ public class TestJsonVisitor
481
532
  k2, ValueFactory.newMap(k2, v));
482
533
 
483
534
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
484
- assertEquals("{\"k1\":{\"k1\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
535
+ assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString());
485
536
  }
486
537
 
487
538
  @Test
@@ -517,8 +568,6 @@ public class TestJsonVisitor
517
568
  "type: column",
518
569
  "add_columns:",
519
570
  " - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
520
- " - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
521
- " - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
522
571
  " - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
523
572
  Schema inputSchema = Schema.builder()
524
573
  .add("json1", JSON)
@@ -544,11 +593,8 @@ public class TestJsonVisitor
544
593
  PluginTask task = taskFromYamlString(
545
594
  "type: column",
546
595
  "columns:",
547
- " - {name: \"$['json1']['k1']\"}",
548
596
  " - {name: \"$['json1']['k1'][1]\", src: \"$['json1']['k1'][0]\"}",
549
597
  " - {name: \"$['json1']['k2'][0]\"}",
550
- " - {name: \"$['json1']['k3']\", type: json, default: \"[]\"}",
551
- " - {name: \"$['json1']['k3'][0]\", type: json, default: \"{}\"}",
552
598
  " - {name: \"$['json1']['k3'][0]['k3']\", type: string, default: v}");
553
599
  Schema inputSchema = Schema.builder()
554
600
  .add("json1", JSON)
@@ -564,7 +610,7 @@ public class TestJsonVisitor
564
610
  k2, ValueFactory.newArray(v, v));
565
611
 
566
612
  MapValue visited = subject.visit("$['json1']", map).asMapValue();
567
- assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
613
+ assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k3\":\"v\"}]}", visited.toString());
568
614
  }
569
615
 
570
616
  // Because the dot notation is converted to single quotes by default,
@@ -652,13 +698,14 @@ public class TestJsonVisitor
652
698
  assertEquals("{\"k____1\":[{\"k____1\":\"v\"}],\"k_2\":{\"k_2\":\"v\"}}", visited.toString());
653
699
  }
654
700
 
701
+ /*
655
702
  @Test
656
703
  public void visit_withColumnNameIncludingSingleQuotes()
657
704
  {
658
705
  PluginTask task = taskFromYamlString(
659
706
  "type: column",
660
707
  "columns:",
661
- " - {name: \"$[\\\"'json1\\\"]['k1']\"}");
708
+ " - {name: \"$['\\\\'json1']['k1']\"}");
662
709
  Schema inputSchema = Schema.builder()
663
710
  .add("'json1", JSON)
664
711
  .build();
@@ -672,9 +719,10 @@ public class TestJsonVisitor
672
719
  MapValue visited = subject.visit("$['\\'json1']", map).asMapValue();
673
720
  assertEquals("{\"k1\":\"v\"}", visited.toString());
674
721
  }
722
+ */
675
723
 
676
724
  @Test(expected = ConfigException.class)
677
- public void constructor_mustBeRaisedConfigExceptionWithMultiProperties() {
725
+ public void configException_MultiProperties() {
678
726
  PluginTask task = taskFromYamlString(
679
727
  "type: column",
680
728
  "columns:",
@@ -687,7 +735,7 @@ public class TestJsonVisitor
687
735
 
688
736
  // It is recognized multi properties if the square brackets does not close properly
689
737
  @Test(expected = ConfigException.class)
690
- public void constructor_mustBeRaisedInvalidPathExceptionWithPropertyIsNotSeparatedByCommas()
738
+ public void configException_PropertyIsNotSeparatedByCommas()
691
739
  {
692
740
  PluginTask task = taskFromYamlString(
693
741
  "type: column",
@@ -700,7 +748,7 @@ public class TestJsonVisitor
700
748
  }
701
749
 
702
750
  @Test(expected = ConfigException.class)
703
- public void constructor_mustBeRaisedConfigExceptionWithFunctionPathToken()
751
+ public void configException_FunctionPathToken()
704
752
  {
705
753
  PluginTask task = taskFromYamlString(
706
754
  "type: column",
@@ -713,7 +761,7 @@ public class TestJsonVisitor
713
761
  }
714
762
 
715
763
  @Test(expected = ConfigException.class)
716
- public void constructor_mustBeRaisedConfigExceptionWithPredicatePathToken()
764
+ public void configException_PredicatePathToken()
717
765
  {
718
766
  PluginTask task = taskFromYamlString(
719
767
  "type: column",
@@ -726,7 +774,7 @@ public class TestJsonVisitor
726
774
  }
727
775
 
728
776
  @Test(expected = ConfigException.class)
729
- public void constructor_mustBeRaisedConfigExceptionWithScanPathToken()
777
+ public void configException_ScanPathToken()
730
778
  {
731
779
  PluginTask task = taskFromYamlString(
732
780
  "type: column",
@@ -739,7 +787,7 @@ public class TestJsonVisitor
739
787
  }
740
788
 
741
789
  @Test(expected = ConfigException.class)
742
- public void constructor_mustBeRaisedConfigExceptionWithMultiIndexOperation()
790
+ public void configException_MultiIndexOperation()
743
791
  {
744
792
  PluginTask task = taskFromYamlString(
745
793
  "type: column",
@@ -752,7 +800,7 @@ public class TestJsonVisitor
752
800
  }
753
801
 
754
802
  @Test(expected = ConfigException.class)
755
- public void constructor_mustBeRaisedConfigExceptionWithMultiIndexOperationAtMiddlePosition()
803
+ public void configException_IndexOperationAtMiddlePosition()
756
804
  {
757
805
  PluginTask task = taskFromYamlString(
758
806
  "type: column",
@@ -765,7 +813,7 @@ public class TestJsonVisitor
765
813
  }
766
814
 
767
815
  @Test(expected = ConfigException.class)
768
- public void constructor_mustBeRaisedConfigExceptionWithMArraySliceOperation()
816
+ public void configException_ArraySliceOperation()
769
817
  {
770
818
  PluginTask task = taskFromYamlString(
771
819
  "type: column",
@@ -778,7 +826,7 @@ public class TestJsonVisitor
778
826
  }
779
827
 
780
828
  @Test(expected = ConfigException.class)
781
- public void constructor_mustBeRaisedConfigExceptionWithMArraySliceOperationAtMiddlePosition()
829
+ public void configException_MArraySliceOperationAtMiddlePosition()
782
830
  {
783
831
  PluginTask task = taskFromYamlString(
784
832
  "type: column",
@@ -794,7 +842,7 @@ public class TestJsonVisitor
794
842
  public ExpectedException thrown = ExpectedException.none();
795
843
 
796
844
  @Test
797
- public void constructor_mustBeRaisedConfigExceptionEffectively()
845
+ public void configException_PathCompileError()
798
846
  {
799
847
  PluginTask task = taskFromYamlString(
800
848
  "type: column",
@@ -808,4 +856,4 @@ public class TestJsonVisitor
808
856
 
809
857
  jsonVisitor(task, inputSchema);
810
858
  }
811
- }
859
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-column
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0.pre2
4
+ version: 0.6.0.pre3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-17 00:00:00.000000000 Z
11
+ date: 2016-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,9 +52,13 @@ files:
52
52
  - README.md
53
53
  - build.gradle
54
54
  - config/checkstyle/checkstyle.xml
55
+ - example/add_columns.txt
55
56
  - example/add_columns.yml
57
+ - example/bracket_notations.txt
56
58
  - example/bracket_notations.yml
59
+ - example/columns.txt
57
60
  - example/columns.yml
61
+ - example/drop_columns.txt
58
62
  - example/drop_columns.yml
59
63
  - example/edgecase.tsv
60
64
  - example/edgecase.yml
@@ -69,8 +73,8 @@ files:
69
73
  - src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java
70
74
  - src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java
71
75
  - src/main/java/org/embulk/filter/column/JsonColumn.java
76
+ - src/main/java/org/embulk/filter/column/JsonPathUtil.java
72
77
  - src/main/java/org/embulk/filter/column/JsonVisitor.java
73
- - src/main/java/org/embulk/filter/column/PathTokenUtil.java
74
78
  - src/test/java/org/embulk/filter/column/TestColumnFilterPlugin.java
75
79
  - src/test/java/org/embulk/filter/column/TestColumnVisitorImpl.java
76
80
  - src/test/java/org/embulk/filter/column/TestJsonColumn.java
@@ -78,9 +82,9 @@ files:
78
82
  - classpath/accessors-smart-1.1.jar
79
83
  - classpath/asm-5.0.3.jar
80
84
  - classpath/commons-lang3-3.4.jar
81
- - classpath/embulk-filter-column-0.6.0.pre2.jar
85
+ - classpath/embulk-filter-column-0.6.0.pre3.jar
82
86
  - classpath/json-smart-2.2.1.jar
83
- - classpath/JsonPathCompiler-0.0.6.jar
87
+ - classpath/JsonPathCompiler-0.0.11.jar
84
88
  - classpath/slf4j-api-1.7.21.jar
85
89
  homepage: https://github.com/sonots/embulk-filter-column
86
90
  licenses:
@@ -1,39 +0,0 @@
1
- package org.embulk.filter.column;
2
-
3
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayIndexOperation;
4
- import com.dena.analytics.jsonpathcompiler.expressions.path.ArrayPathToken;
5
- import com.dena.analytics.jsonpathcompiler.expressions.path.FunctionPathToken;
6
- import com.dena.analytics.jsonpathcompiler.expressions.path.PathToken;
7
- import com.dena.analytics.jsonpathcompiler.expressions.path.PredicatePathToken;
8
- import com.dena.analytics.jsonpathcompiler.expressions.path.ScanPathToken;
9
- import org.embulk.config.ConfigException;
10
-
11
- public class PathTokenUtil
12
- {
13
- public static void assertSupportedPathToken(PathToken pathToken, String path)
14
- {
15
- if (pathToken instanceof ArrayPathToken) {
16
- ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation();
17
- assertSupportedArrayPathToken(arrayIndexOperation, path);
18
- }
19
- else if (pathToken instanceof ScanPathToken) {
20
- throw new ConfigException(String.format("scan path token is not supported \"%s\"", path));
21
- }
22
- else if (pathToken instanceof FunctionPathToken) {
23
- throw new ConfigException(String.format("function path token is not supported \"%s\"", path));
24
- }
25
- else if (pathToken instanceof PredicatePathToken) {
26
- throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path));
27
- }
28
- }
29
-
30
- public static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path)
31
- {
32
- if (arrayIndexOperation == null) {
33
- throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path));
34
- }
35
- else if (!arrayIndexOperation.isSingleIndexOperation()) {
36
- throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path));
37
- }
38
- }
39
- }