embulk-parser-jsonpath 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +103 -0
- data/build.gradle +3 -3
- data/src/main/java/org/embulk/parser/jsonpath/ColumnVisitorImpl.java +70 -32
- data/src/main/java/org/embulk/parser/jsonpath/JsonpathParserPlugin.java +93 -64
- data/src/test/java/org/embulk/parser/jsonpath/TestJsonpathParserPlugin.java +128 -8
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a5c5cf3bdba81a3a1bb16dae74d6c4a7af3379c
|
4
|
+
data.tar.gz: e041030833ba05bcc5967e84d4d0e6604b10094c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a532592bc7672a092c43da076b51a0773284a55db1e1b3a4a547247031f981a3acdde9afa53ed5653f4742b94d7abc9ed76bb93ac4cae1ed58bff6eb3242a54
|
7
|
+
data.tar.gz: 428416e6b448f50458c7e36e0b1de078fb110737ae081b3a92e70751639216efd49f91017743ecfdcb20d23bdfab16d168621daf45981b7c8c70318a5af74604
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# ChangeLog
|
2
2
|
|
3
|
+
## 0.2.0 (2017-03-13)
|
4
|
+
|
5
|
+
* Support `path` parameter in column config. (@takumakanari)
|
6
|
+
* Allow some strings convert to boolean.(@takumakanari)
|
7
|
+
* Support `schema` parameter for compatibility [embulk-parser-json](https://github.com/takumakanari/embulk-parser-json).
|
8
|
+
|
3
9
|
## 0.1.3 (2017-03-07)
|
4
10
|
|
5
11
|
* Make enable to skip invalid records/columns(@takumakanari)
|
data/README.md
CHANGED
@@ -17,8 +17,19 @@ The JSON with [JSONPath](http://goessner.net/articles/JsonPath/) parser plugin f
|
|
17
17
|
* **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`)
|
18
18
|
* **default_typecast**: Specify whether to cast values automatically to the specified types or not (boolean, default: true)
|
19
19
|
|
20
|
+
### columns
|
21
|
+
|
22
|
+
* **name**: Name of the column (string, required)
|
23
|
+
* **type**: Type of the column (string, required)
|
24
|
+
* **timezone**: Timezone of the timestamp if type is timestamp (string, default: default_timestamp)
|
25
|
+
* **format**: Format of the timestamp if type is timestamp (string, default: default_format)
|
26
|
+
* **typecast**: Whether cast values or not (boolean, default: default_typecast)
|
27
|
+
* **path**: JSON ppath for specific column. (string, default: `null`)
|
28
|
+
|
20
29
|
## Example
|
21
30
|
|
31
|
+
### Basic Usage
|
32
|
+
|
22
33
|
```json
|
23
34
|
{
|
24
35
|
"count": 100,
|
@@ -88,12 +99,104 @@ registered_at (timestamp) : 2014-06-30 19:25:27 UTC
|
|
88
99
|
age ( long) : 73
|
89
100
|
ratio ( double) : 50.608
|
90
101
|
```
|
102
|
+
### Handle more complicated json
|
103
|
+
|
104
|
+
|
105
|
+
If you want to handle more complicated json, you can specify jsonpath to also **path** in columns section like as follows:
|
106
|
+
|
107
|
+
```json
|
108
|
+
{
|
109
|
+
"result" : "success",
|
110
|
+
"students" : [
|
111
|
+
{ "names" : ["John", "Lennon"], "age" : 10 },
|
112
|
+
{ "names" : ["Paul", "Maccartney"], "age" : 10 }
|
113
|
+
]
|
114
|
+
}
|
115
|
+
```
|
116
|
+
|
117
|
+
```yaml
|
118
|
+
root: $.students
|
119
|
+
columns:
|
120
|
+
- {name: firstName, type: string, path: "names[0]"}
|
121
|
+
- {name: lastName, type: string, path: "names[1]"}
|
122
|
+
```
|
123
|
+
|
124
|
+
In this case, names[0] will be firstName of schema and names[1] will be lastName.
|
125
|
+
|
126
|
+
## Guess
|
127
|
+
|
128
|
+
This plugin supports minimal `guess` feature. You don't have to write `parser:` section in the configuration file.
|
129
|
+
After writing `in:` section, you can let embulk guess `parser:` section using this command:
|
91
130
|
|
92
131
|
```
|
93
132
|
$ embulk gem install embulk-parser-jsonpath
|
94
133
|
$ embulk guess -g jsonpath config.yml -o guessed.yml
|
95
134
|
```
|
96
135
|
|
136
|
+
### Example
|
137
|
+
|
138
|
+
If you want to `guess` the following JSON file,
|
139
|
+
(This JSON data start with array)
|
140
|
+
You don't have to need `parser section`.
|
141
|
+
|
142
|
+
```json
|
143
|
+
[
|
144
|
+
{
|
145
|
+
"name": "Hugh Rutherford",
|
146
|
+
"city": "Mitchellfurt",
|
147
|
+
"street_name": "Ondricka Island",
|
148
|
+
"zip_code": "75232",
|
149
|
+
"registered_at": "2015-09-09 05:28:45",
|
150
|
+
"vegetarian": true,
|
151
|
+
"age": 44,
|
152
|
+
"ratio": 79.092
|
153
|
+
}
|
154
|
+
]
|
155
|
+
```
|
156
|
+
|
157
|
+
```yaml
|
158
|
+
in:
|
159
|
+
type: file
|
160
|
+
path_prefix: example/hoge
|
161
|
+
out:
|
162
|
+
type: stdout
|
163
|
+
```
|
164
|
+
|
165
|
+
However, If a JSON data doesn't start with array,
|
166
|
+
You have to specify `root` parameter explicitly.
|
167
|
+
|
168
|
+
```json
|
169
|
+
{
|
170
|
+
"count": 100,
|
171
|
+
"page": 1,
|
172
|
+
"results": [
|
173
|
+
{
|
174
|
+
"name": "Hugh Rutherford",
|
175
|
+
"city": "Mitchellfurt",
|
176
|
+
"street_name": "Ondricka Island",
|
177
|
+
"zip_code": "75232",
|
178
|
+
"registered_at": "2015-09-09 05:28:45",
|
179
|
+
"vegetarian": true,
|
180
|
+
"age": 44,
|
181
|
+
"ratio": 79.092
|
182
|
+
}
|
183
|
+
]
|
184
|
+
}
|
185
|
+
```
|
186
|
+
|
187
|
+
|
188
|
+
```yaml
|
189
|
+
in:
|
190
|
+
type: file
|
191
|
+
path_prefix: example/input
|
192
|
+
parser:
|
193
|
+
type: jsonpath
|
194
|
+
root: "$.results"
|
195
|
+
out:
|
196
|
+
type: stdout
|
197
|
+
```
|
198
|
+
|
199
|
+
|
97
200
|
## Build
|
98
201
|
|
99
202
|
```
|
data/build.gradle
CHANGED
@@ -14,7 +14,7 @@ configurations {
|
|
14
14
|
provided
|
15
15
|
}
|
16
16
|
|
17
|
-
version = "0.
|
17
|
+
version = "0.2.0"
|
18
18
|
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
@@ -90,10 +90,10 @@ task gemspec {
|
|
90
90
|
Gem::Specification.new do |spec|
|
91
91
|
spec.name = "${project.name}"
|
92
92
|
spec.version = "${project.version}"
|
93
|
-
spec.authors = ["Hiroyuki Sato"]
|
93
|
+
spec.authors = ["Hiroyuki Sato","Takuma kanari"]
|
94
94
|
spec.summary = %[JSON parser with JSONPath plugin for Embulk]
|
95
95
|
spec.description = %[Parses JSON files with JSONPath read by other file input plugins.]
|
96
|
-
spec.email = ["hiroysato@gmail.com"]
|
96
|
+
spec.email = ["hiroysato@gmail.com","chemtrails.t@gmail.com"]
|
97
97
|
spec.licenses = ["MIT"]
|
98
98
|
spec.homepage = "https://github.com/hiroyuki-sato/embulk-parser-jsonpath"
|
99
99
|
|
@@ -1,29 +1,44 @@
|
|
1
1
|
package org.embulk.parser.jsonpath;
|
2
2
|
|
3
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
3
4
|
import com.google.common.base.Optional;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
4
6
|
import org.embulk.parser.jsonpath.JsonpathParserPlugin.PluginTask;
|
5
7
|
import org.embulk.parser.jsonpath.JsonpathParserPlugin.TypecastColumnOption;
|
6
|
-
|
7
8
|
import org.embulk.spi.Column;
|
8
9
|
import org.embulk.spi.ColumnConfig;
|
9
10
|
import org.embulk.spi.ColumnVisitor;
|
10
11
|
import org.embulk.spi.PageBuilder;
|
11
12
|
import org.embulk.spi.Schema;
|
12
13
|
import org.embulk.spi.SchemaConfig;
|
14
|
+
import org.embulk.spi.json.JsonParseException;
|
15
|
+
import org.embulk.spi.json.JsonParser;
|
13
16
|
import org.embulk.spi.time.Timestamp;
|
14
17
|
import org.embulk.spi.time.TimestampParser;
|
15
18
|
import org.msgpack.core.MessageTypeException;
|
16
|
-
import org.msgpack.value.Value;
|
17
19
|
|
18
|
-
|
20
|
+
import java.util.List;
|
21
|
+
|
22
|
+
import static java.lang.String.format;
|
23
|
+
import static org.msgpack.value.ValueFactory.newBoolean;
|
24
|
+
import static org.msgpack.value.ValueFactory.newFloat;
|
25
|
+
import static org.msgpack.value.ValueFactory.newInteger;
|
26
|
+
import static org.msgpack.value.ValueFactory.newString;
|
27
|
+
|
28
|
+
public class ColumnVisitorImpl
|
29
|
+
implements ColumnVisitor
|
19
30
|
{
|
31
|
+
private static final JsonParser JSON_PARSER = new JsonParser();
|
32
|
+
private static final List<String> BOOL_TRUE_STRINGS = ImmutableList.of("true", "1", "yes", "on", "y", "t");
|
33
|
+
private static final List<String> BOOL_FALSE_STRINGS = ImmutableList.of("false", "0", "no", "off", "n", "f");
|
34
|
+
|
20
35
|
protected final PluginTask task;
|
21
36
|
protected final Schema schema;
|
22
37
|
protected final PageBuilder pageBuilder;
|
23
38
|
protected final TimestampParser[] timestampParsers;
|
24
39
|
protected final Boolean[] autoTypecasts;
|
25
40
|
|
26
|
-
protected
|
41
|
+
protected JsonNode value;
|
27
42
|
|
28
43
|
public ColumnVisitorImpl(PluginTask task, Schema schema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
29
44
|
{
|
@@ -41,20 +56,20 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
41
56
|
this.autoTypecasts[column.getIndex()] = task.getDefaultTypecast();
|
42
57
|
}
|
43
58
|
|
44
|
-
//
|
45
|
-
SchemaConfig schemaConfig = task.getSchemaConfig();
|
59
|
+
// typecast option supports `columns` only.
|
60
|
+
Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
|
46
61
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
62
|
+
if (schemaConfig.isPresent()) {
|
63
|
+
for (ColumnConfig columnConfig : schemaConfig.get().getColumns()) {
|
64
|
+
TypecastColumnOption columnOption = columnConfig.getOption().loadConfig(TypecastColumnOption.class);
|
65
|
+
Boolean autoTypecast = columnOption.getTypecast().or(task.getDefaultTypecast());
|
66
|
+
Column column = schema.lookupColumn(columnConfig.getName());
|
67
|
+
this.autoTypecasts[column.getIndex()] = autoTypecast;
|
68
|
+
}
|
53
69
|
}
|
54
|
-
// }
|
55
70
|
}
|
56
71
|
|
57
|
-
public void setValue(
|
72
|
+
public void setValue(JsonNode value)
|
58
73
|
{
|
59
74
|
this.value = value;
|
60
75
|
}
|
@@ -64,15 +79,32 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
64
79
|
{
|
65
80
|
if (isNil(value)) {
|
66
81
|
pageBuilder.setNull(column);
|
82
|
+
return;
|
83
|
+
}
|
84
|
+
|
85
|
+
final boolean val;
|
86
|
+
if (value.isBoolean()) {
|
87
|
+
val = value.asBoolean();
|
67
88
|
}
|
68
89
|
else {
|
69
|
-
|
70
|
-
|
71
|
-
|
90
|
+
String stringValue = valueAsString().toLowerCase();
|
91
|
+
if (BOOL_TRUE_STRINGS.contains(stringValue)) {
|
92
|
+
val = true;
|
72
93
|
}
|
73
|
-
|
74
|
-
|
94
|
+
else if (BOOL_FALSE_STRINGS.contains(stringValue)) {
|
95
|
+
val = false;
|
75
96
|
}
|
97
|
+
else {
|
98
|
+
throw new JsonRecordValidateException(format("can not convert '%s' to Boolean", value));
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
try {
|
103
|
+
boolean booleanValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asBoolean(newBoolean(val)) : val;
|
104
|
+
pageBuilder.setBoolean(column, booleanValue);
|
105
|
+
}
|
106
|
+
catch (MessageTypeException e) {
|
107
|
+
throw new JsonRecordValidateException(format("failed to get \"%s\" as Boolean", value), e);
|
76
108
|
}
|
77
109
|
}
|
78
110
|
|
@@ -84,11 +116,11 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
84
116
|
}
|
85
117
|
else {
|
86
118
|
try {
|
87
|
-
long longValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asLong(value) : value.
|
119
|
+
long longValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asLong(newInteger(value.asLong())) : value.asLong();
|
88
120
|
pageBuilder.setLong(column, longValue);
|
89
121
|
}
|
90
122
|
catch (MessageTypeException e) {
|
91
|
-
throw new JsonRecordValidateException(
|
123
|
+
throw new JsonRecordValidateException(format("failed to get \"%s\" as Long", value), e);
|
92
124
|
}
|
93
125
|
}
|
94
126
|
}
|
@@ -101,11 +133,11 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
101
133
|
}
|
102
134
|
else {
|
103
135
|
try {
|
104
|
-
double doubleValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asDouble(value) : value.
|
136
|
+
double doubleValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asDouble(newFloat(value.asDouble())) : value.asDouble();
|
105
137
|
pageBuilder.setDouble(column, doubleValue);
|
106
138
|
}
|
107
139
|
catch (MessageTypeException e) {
|
108
|
-
throw new JsonRecordValidateException(
|
140
|
+
throw new JsonRecordValidateException(format("failed get \"%s\" as Double", value), e);
|
109
141
|
}
|
110
142
|
}
|
111
143
|
}
|
@@ -117,12 +149,13 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
117
149
|
pageBuilder.setNull(column);
|
118
150
|
}
|
119
151
|
else {
|
152
|
+
final String stringValue = valueAsString();
|
120
153
|
try {
|
121
|
-
String string = autoTypecasts[column.getIndex()] ? ColumnCaster.asString(
|
154
|
+
String string = autoTypecasts[column.getIndex()] ? ColumnCaster.asString(newString(stringValue)) : stringValue;
|
122
155
|
pageBuilder.setString(column, string);
|
123
156
|
}
|
124
157
|
catch (MessageTypeException e) {
|
125
|
-
throw new JsonRecordValidateException(
|
158
|
+
throw new JsonRecordValidateException(format("failed to get \"%s\" as String", value), e);
|
126
159
|
}
|
127
160
|
}
|
128
161
|
}
|
@@ -135,11 +168,11 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
135
168
|
}
|
136
169
|
else {
|
137
170
|
try {
|
138
|
-
Timestamp timestamp = ColumnCaster.asTimestamp(value, timestampParsers[column.getIndex()]);
|
171
|
+
Timestamp timestamp = ColumnCaster.asTimestamp(newString(value.asText()), timestampParsers[column.getIndex()]);
|
139
172
|
pageBuilder.setTimestamp(column, timestamp);
|
140
173
|
}
|
141
174
|
catch (MessageTypeException e) {
|
142
|
-
throw new JsonRecordValidateException(
|
175
|
+
throw new JsonRecordValidateException(format("failed to get \"%s\" as Timestamp", value), e);
|
143
176
|
}
|
144
177
|
}
|
145
178
|
}
|
@@ -152,16 +185,21 @@ public class ColumnVisitorImpl implements ColumnVisitor
|
|
152
185
|
}
|
153
186
|
else {
|
154
187
|
try {
|
155
|
-
pageBuilder.setJson(column,
|
188
|
+
pageBuilder.setJson(column, JSON_PARSER.parse(valueAsString()));
|
156
189
|
}
|
157
|
-
catch (MessageTypeException e) {
|
158
|
-
throw new JsonRecordValidateException(
|
190
|
+
catch (MessageTypeException | JsonParseException e) {
|
191
|
+
throw new JsonRecordValidateException(format("failed to get \"%s\" as Json", value), e);
|
159
192
|
}
|
160
193
|
}
|
161
194
|
}
|
162
195
|
|
163
|
-
protected boolean isNil(
|
196
|
+
protected boolean isNil(JsonNode v)
|
197
|
+
{
|
198
|
+
return v == null || v.isNull();
|
199
|
+
}
|
200
|
+
|
201
|
+
private String valueAsString()
|
164
202
|
{
|
165
|
-
return
|
203
|
+
return value.isTextual() ? value.asText() : value.toString();
|
166
204
|
}
|
167
205
|
}
|
@@ -1,16 +1,23 @@
|
|
1
1
|
package org.embulk.parser.jsonpath;
|
2
2
|
|
3
|
+
import com.fasterxml.jackson.databind.JsonNode;
|
4
|
+
import com.fasterxml.jackson.databind.node.JsonNodeType;
|
3
5
|
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Throwables;
|
5
6
|
import com.google.common.collect.ImmutableMap;
|
7
|
+
import com.jayway.jsonpath.Configuration;
|
8
|
+
import com.jayway.jsonpath.InvalidJsonException;
|
6
9
|
import com.jayway.jsonpath.JsonPath;
|
7
10
|
import com.jayway.jsonpath.PathNotFoundException;
|
11
|
+
import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
|
12
|
+
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
|
8
13
|
import org.embulk.config.Config;
|
9
14
|
import org.embulk.config.ConfigDefault;
|
15
|
+
import org.embulk.config.ConfigException;
|
10
16
|
import org.embulk.config.ConfigSource;
|
11
17
|
import org.embulk.config.Task;
|
12
18
|
import org.embulk.config.TaskSource;
|
13
19
|
import org.embulk.spi.Column;
|
20
|
+
import org.embulk.spi.ColumnConfig;
|
14
21
|
import org.embulk.spi.DataException;
|
15
22
|
import org.embulk.spi.Exec;
|
16
23
|
import org.embulk.spi.FileInput;
|
@@ -19,20 +26,16 @@ import org.embulk.spi.PageOutput;
|
|
19
26
|
import org.embulk.spi.ParserPlugin;
|
20
27
|
import org.embulk.spi.Schema;
|
21
28
|
import org.embulk.spi.SchemaConfig;
|
22
|
-
import org.embulk.spi.json.JsonParseException;
|
23
|
-
import org.embulk.spi.json.JsonParser;
|
24
29
|
import org.embulk.spi.time.TimestampParser;
|
25
30
|
import org.embulk.spi.util.FileInputInputStream;
|
26
31
|
import org.embulk.spi.util.Timestamps;
|
27
|
-
import org.msgpack.value.Value;
|
28
32
|
import org.slf4j.Logger;
|
29
33
|
|
30
|
-
import java.io.IOException;
|
31
34
|
import java.util.Locale;
|
32
35
|
import java.util.Map;
|
33
36
|
|
37
|
+
import static java.lang.String.format;
|
34
38
|
import static java.util.Locale.ENGLISH;
|
35
|
-
import static org.msgpack.value.ValueFactory.newString;
|
36
39
|
|
37
40
|
public class JsonpathParserPlugin
|
38
41
|
implements ParserPlugin
|
@@ -40,14 +43,18 @@ public class JsonpathParserPlugin
|
|
40
43
|
|
41
44
|
private static final Logger logger = Exec.getLogger(JsonpathParserPlugin.class);
|
42
45
|
|
43
|
-
private
|
46
|
+
private static final Configuration JSON_PATH_CONFIG = Configuration
|
47
|
+
.builder()
|
48
|
+
.mappingProvider(new JacksonMappingProvider())
|
49
|
+
.jsonProvider(new JacksonJsonNodeJsonProvider())
|
50
|
+
.build();
|
44
51
|
|
45
52
|
public interface TypecastColumnOption
|
46
53
|
extends Task
|
47
54
|
{
|
48
55
|
@Config("typecast")
|
49
56
|
@ConfigDefault("null")
|
50
|
-
|
57
|
+
Optional<Boolean> getTypecast();
|
51
58
|
}
|
52
59
|
|
53
60
|
public interface PluginTask
|
@@ -55,10 +62,16 @@ public class JsonpathParserPlugin
|
|
55
62
|
{
|
56
63
|
@Config("root")
|
57
64
|
@ConfigDefault("\"$\"")
|
58
|
-
|
65
|
+
String getRoot();
|
59
66
|
|
60
67
|
@Config("columns")
|
61
|
-
|
68
|
+
@ConfigDefault("null")
|
69
|
+
Optional<SchemaConfig> getSchemaConfig();
|
70
|
+
|
71
|
+
@Config("schema")
|
72
|
+
@ConfigDefault("null")
|
73
|
+
@Deprecated
|
74
|
+
Optional<SchemaConfig> getOldSchemaConfig();
|
62
75
|
|
63
76
|
@Config("default_typecast")
|
64
77
|
@ConfigDefault("true")
|
@@ -69,12 +82,20 @@ public class JsonpathParserPlugin
|
|
69
82
|
boolean getStopOnInvalidRecord();
|
70
83
|
}
|
71
84
|
|
85
|
+
public interface JsonpathColumnOption
|
86
|
+
extends Task
|
87
|
+
{
|
88
|
+
@Config("path")
|
89
|
+
@ConfigDefault("null")
|
90
|
+
Optional<String> getPath();
|
91
|
+
}
|
92
|
+
|
72
93
|
@Override
|
73
94
|
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
74
95
|
{
|
75
96
|
PluginTask task = config.loadConfig(PluginTask.class);
|
76
97
|
|
77
|
-
Schema schema =
|
98
|
+
Schema schema = getSchemaConfig(task).toSchema();
|
78
99
|
|
79
100
|
control.run(task.dump(), schema);
|
80
101
|
}
|
@@ -86,11 +107,9 @@ public class JsonpathParserPlugin
|
|
86
107
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
87
108
|
String jsonRoot = task.getRoot();
|
88
109
|
|
89
|
-
setColumnNameValues(schema);
|
90
|
-
|
91
110
|
logger.info("JSONPath = " + jsonRoot);
|
92
|
-
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task,
|
93
|
-
final
|
111
|
+
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
|
112
|
+
final Map<Column, String> jsonPathMap = createJsonPathMap(task, schema);
|
94
113
|
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
95
114
|
|
96
115
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
@@ -98,59 +117,56 @@ public class JsonpathParserPlugin
|
|
98
117
|
|
99
118
|
FileInputInputStream is = new FileInputInputStream(input);
|
100
119
|
while (is.nextFile()) {
|
101
|
-
|
120
|
+
final JsonNode json;
|
102
121
|
try {
|
103
|
-
|
104
|
-
try {
|
105
|
-
json = JsonPath.read(is, jsonRoot).toString();
|
106
|
-
}
|
107
|
-
catch (IOException e) {
|
108
|
-
throw Throwables.propagate(e);
|
109
|
-
}
|
110
|
-
catch (PathNotFoundException e) {
|
111
|
-
throw new DataException(String.format(Locale.ENGLISH, "Failed to get json root reason = %s",
|
112
|
-
e.getMessage()));
|
113
|
-
}
|
114
|
-
|
115
|
-
try {
|
116
|
-
value = jsonParser.parse(json);
|
117
|
-
}
|
118
|
-
catch (JsonParseException e) {
|
119
|
-
throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'",
|
120
|
-
e.getMessage(), json));
|
121
|
-
}
|
122
|
-
|
123
|
-
if (!value.isArrayValue()) {
|
124
|
-
throw new JsonRecordValidateException("Json string is not representing array value.");
|
125
|
-
}
|
122
|
+
json = JsonPath.using(JSON_PATH_CONFIG).parse(is).read(jsonRoot, JsonNode.class);
|
126
123
|
}
|
127
|
-
catch (
|
128
|
-
skipOrThrow(
|
124
|
+
catch (PathNotFoundException e) {
|
125
|
+
skipOrThrow(new DataException(format(Locale.ENGLISH,
|
126
|
+
"Failed to get root json path='%s'", jsonRoot)), stopOnInvalidRecord);
|
127
|
+
continue;
|
128
|
+
}
|
129
|
+
catch (InvalidJsonException e) {
|
130
|
+
skipOrThrow(new DataException(e), stopOnInvalidRecord);
|
129
131
|
continue;
|
130
132
|
}
|
131
133
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
}
|
134
|
+
if (!json.isArray()) {
|
135
|
+
skipOrThrow(new JsonRecordValidateException(format(Locale.ENGLISH,
|
136
|
+
"Json string is not representing array value json='%s'", json)), stopOnInvalidRecord);
|
137
|
+
continue;
|
138
|
+
}
|
138
139
|
|
139
|
-
|
140
|
-
final Map<Value, Value> record = recordValue.asMapValue().map();
|
140
|
+
for (JsonNode recordValue : json) {
|
141
141
|
try {
|
142
|
+
if (recordValue.getNodeType() != JsonNodeType.OBJECT) {
|
143
|
+
throw new JsonRecordValidateException(format(Locale.ENGLISH,
|
144
|
+
"Json string is not representing map value json='%s'", recordValue));
|
145
|
+
}
|
146
|
+
|
142
147
|
for (Column column : schema.getColumns()) {
|
143
|
-
|
144
|
-
|
148
|
+
JsonNode value = null;
|
149
|
+
if (jsonPathMap.containsKey(column)) {
|
150
|
+
try {
|
151
|
+
value = JsonPath.using(JSON_PATH_CONFIG).parse(recordValue).read(jsonPathMap.get(column));
|
152
|
+
}
|
153
|
+
catch (PathNotFoundException e) {
|
154
|
+
// pass (value is nullable)
|
155
|
+
}
|
156
|
+
}
|
157
|
+
else {
|
158
|
+
value = recordValue.get(column.getName());
|
159
|
+
}
|
160
|
+
visitor.setValue(value);
|
145
161
|
column.visit(visitor);
|
146
162
|
}
|
163
|
+
|
164
|
+
pageBuilder.addRecord();
|
147
165
|
}
|
148
166
|
catch (DataException e) {
|
149
167
|
skipOrThrow(e, stopOnInvalidRecord);
|
150
168
|
continue;
|
151
169
|
}
|
152
|
-
|
153
|
-
pageBuilder.addRecord();
|
154
170
|
}
|
155
171
|
}
|
156
172
|
|
@@ -158,19 +174,17 @@ public class JsonpathParserPlugin
|
|
158
174
|
}
|
159
175
|
}
|
160
176
|
|
161
|
-
private
|
177
|
+
private Map<Column, String> createJsonPathMap(PluginTask task, Schema schema)
|
162
178
|
{
|
163
|
-
ImmutableMap.Builder<
|
164
|
-
for (
|
165
|
-
|
166
|
-
|
179
|
+
ImmutableMap.Builder<Column, String> builder = ImmutableMap.builder();
|
180
|
+
for (int i = 0; i < schema.size(); i++) {
|
181
|
+
ColumnConfig config = getSchemaConfig(task).getColumn(i);
|
182
|
+
JsonpathColumnOption option = config.getOption().loadConfig(JsonpathColumnOption.class);
|
183
|
+
if (option.getPath().isPresent()) {
|
184
|
+
builder.put(schema.getColumn(i), option.getPath().get());
|
185
|
+
}
|
167
186
|
}
|
168
|
-
|
169
|
-
}
|
170
|
-
|
171
|
-
private Value getColumnNameValue(Column column)
|
172
|
-
{
|
173
|
-
return columnNameValues.get(column.getName());
|
187
|
+
return builder.build();
|
174
188
|
}
|
175
189
|
|
176
190
|
private void skipOrThrow(DataException cause, boolean stopOnInvalidRecord)
|
@@ -180,4 +194,19 @@ public class JsonpathParserPlugin
|
|
180
194
|
}
|
181
195
|
logger.warn(String.format(ENGLISH, "Skipped invalid record (%s)", cause));
|
182
196
|
}
|
197
|
+
|
198
|
+
// this method is to keep the backward compatibility of 'schema' option.
|
199
|
+
private SchemaConfig getSchemaConfig(PluginTask task)
|
200
|
+
{
|
201
|
+
if (task.getSchemaConfig().isPresent()) {
|
202
|
+
return task.getSchemaConfig().get();
|
203
|
+
}
|
204
|
+
else if (task.getOldSchemaConfig().isPresent()) {
|
205
|
+
logger.warn("Please use 'columns' option instead of 'schema' because the 'schema' option is deprecated. The next version will stop 'schema' option support.");
|
206
|
+
return task.getOldSchemaConfig().get();
|
207
|
+
}
|
208
|
+
else {
|
209
|
+
throw new ConfigException("Attribute 'columns' is required but not set");
|
210
|
+
}
|
211
|
+
}
|
183
212
|
}
|
@@ -26,7 +26,6 @@ import java.io.ByteArrayInputStream;
|
|
26
26
|
import java.io.File;
|
27
27
|
import java.io.IOException;
|
28
28
|
import java.io.InputStream;
|
29
|
-
import java.nio.charset.Charset;
|
30
29
|
import java.nio.charset.StandardCharsets;
|
31
30
|
import java.util.List;
|
32
31
|
|
@@ -37,6 +36,7 @@ import static org.embulk.spi.type.Types.LONG;
|
|
37
36
|
import static org.embulk.spi.type.Types.STRING;
|
38
37
|
import static org.embulk.spi.type.Types.TIMESTAMP;
|
39
38
|
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertFalse;
|
40
40
|
import static org.junit.Assert.assertNull;
|
41
41
|
import static org.junit.Assert.assertTrue;
|
42
42
|
import static org.junit.Assert.fail;
|
@@ -167,6 +167,51 @@ public class TestJsonpathParserPlugin
|
|
167
167
|
}
|
168
168
|
}
|
169
169
|
|
170
|
+
@Test
|
171
|
+
public void booleanStrings()
|
172
|
+
throws Exception
|
173
|
+
{
|
174
|
+
SchemaConfig schema = schema(column("_c1", BOOLEAN), column("_c2", BOOLEAN),
|
175
|
+
column("_c3", BOOLEAN), column("_c4", BOOLEAN), column("_c5", BOOLEAN),
|
176
|
+
column("_c6", BOOLEAN), column("_c7", BOOLEAN), column("_c8", BOOLEAN),
|
177
|
+
column("_c9", BOOLEAN), column("_c10", BOOLEAN), column("_c11", BOOLEAN),
|
178
|
+
column("_c12", BOOLEAN));
|
179
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
180
|
+
|
181
|
+
transaction(config, fileInput("[{\"_c1\" : \"yes\", \"_c2\" : \"true\", \"_c3\" : \"1\",",
|
182
|
+
"\"_c4\" : \"on\", \"_c5\" : \"y\", \"_c6\" : \"t\",",
|
183
|
+
"\"_c7\" : \"no\", \"_c8\" : \"false\", \"_c9\" : \"0\"," ,
|
184
|
+
"\"_c10\" : \"off\", \"_c11\" : \"n\", \"_c12\" : \"f\"}]"));
|
185
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
186
|
+
assertEquals(1, records.size());
|
187
|
+
|
188
|
+
Object[] record = records.get(0);
|
189
|
+
for (int i = 0; i < 5; i++) {
|
190
|
+
assertTrue((boolean) record[i]);
|
191
|
+
}
|
192
|
+
for (int i = 6; i < 11; i++) {
|
193
|
+
assertFalse((boolean) record[i]);
|
194
|
+
}
|
195
|
+
}
|
196
|
+
|
197
|
+
@Test
|
198
|
+
public void invalidBooleanString()
|
199
|
+
throws Exception
|
200
|
+
{
|
201
|
+
SchemaConfig schema = schema(column("_c1", BOOLEAN));
|
202
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
203
|
+
set("stop_on_invalid_record", true);
|
204
|
+
|
205
|
+
try {
|
206
|
+
transaction(config,
|
207
|
+
fileInput("[{\"_c1\" : \"INVALID\"}]"));
|
208
|
+
fail();
|
209
|
+
}
|
210
|
+
catch (Throwable t) {
|
211
|
+
assertTrue(t instanceof DataException);
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
170
215
|
@Test
|
171
216
|
public void stopOnBrokenColumn()
|
172
217
|
throws Exception
|
@@ -216,9 +261,9 @@ public class TestJsonpathParserPlugin
|
|
216
261
|
|
217
262
|
transaction(config, fileInput(
|
218
263
|
"[",
|
219
|
-
"{}",
|
220
|
-
"{\"_c0\":null,\"_c1\":null,\"_c2\":null}",
|
221
|
-
"{\"_c3\":null,\"_c4\":null,\"_c5\":null}",
|
264
|
+
"{},",
|
265
|
+
"{\"_c0\":null,\"_c1\":null,\"_c2\":null},",
|
266
|
+
"{\"_c3\":null,\"_c4\":null,\"_c5\":null},",
|
222
267
|
"{}",
|
223
268
|
"]"
|
224
269
|
));
|
@@ -244,8 +289,8 @@ public class TestJsonpathParserPlugin
|
|
244
289
|
|
245
290
|
transaction(config, fileInput(
|
246
291
|
"[",
|
247
|
-
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
|
248
|
-
"[1, 2, 3]",
|
292
|
+
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}},",
|
293
|
+
"[1, 2, 3],",
|
249
294
|
"{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
|
250
295
|
"]"
|
251
296
|
));
|
@@ -287,8 +332,8 @@ public class TestJsonpathParserPlugin
|
|
287
332
|
|
288
333
|
transaction(config, fileInput(
|
289
334
|
"{\"records\":[",
|
290
|
-
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
|
291
|
-
"[1, 2, 3]",
|
335
|
+
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}},",
|
336
|
+
"[1, 2, 3],",
|
292
337
|
"{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
|
293
338
|
"]}"
|
294
339
|
));
|
@@ -319,6 +364,81 @@ public class TestJsonpathParserPlugin
|
|
319
364
|
recreatePageOutput();
|
320
365
|
}
|
321
366
|
|
367
|
+
@Test
|
368
|
+
public void useJsonPath()
|
369
|
+
throws Exception
|
370
|
+
{
|
371
|
+
SchemaConfig schema = schema(
|
372
|
+
column("__c0", BOOLEAN, config().set("path", "$._c0")), column("__c1", LONG, config().set("path", "$._c1")),
|
373
|
+
column("__c2", DOUBLE, config().set("path", "$._c2")), column("__c3", STRING, config().set("path", "$._c3")),
|
374
|
+
column("__c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z").set("path", "$._c4")),
|
375
|
+
column("__c5", JSON, config().set("path", "$._c5")));
|
376
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
377
|
+
|
378
|
+
transaction(config, fileInput(
|
379
|
+
"[",
|
380
|
+
"{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}},",
|
381
|
+
"[1, 2, 3],",
|
382
|
+
"{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
|
383
|
+
"]"
|
384
|
+
));
|
385
|
+
|
386
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
387
|
+
assertEquals(2, records.size());
|
388
|
+
|
389
|
+
Object[] record;
|
390
|
+
{
|
391
|
+
record = records.get(0);
|
392
|
+
assertEquals(true, record[0]);
|
393
|
+
assertEquals(10L, record[1]);
|
394
|
+
assertEquals(0.1, (Double) record[2], 0.0001);
|
395
|
+
assertEquals("embulk", record[3]);
|
396
|
+
assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
|
397
|
+
assertEquals(newMap(newString("k"), newString("v")), record[5]);
|
398
|
+
}
|
399
|
+
{
|
400
|
+
record = records.get(1);
|
401
|
+
assertEquals(false, record[0]);
|
402
|
+
assertEquals(-10L, record[1]);
|
403
|
+
assertEquals(1.0, (Double) record[2], 0.0001);
|
404
|
+
assertEquals("エンバルク", record[3]);
|
405
|
+
assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
|
406
|
+
assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
|
407
|
+
}
|
408
|
+
|
409
|
+
recreatePageOutput();
|
410
|
+
}
|
411
|
+
|
412
|
+
@Test
|
413
|
+
public void writeNilsWithJsonPath()
|
414
|
+
throws Exception
|
415
|
+
{
|
416
|
+
SchemaConfig schema = schema(
|
417
|
+
column("__c0", BOOLEAN, config().set("path", "$._c0")), column("__c1", LONG, config().set("path", "$._c1")),
|
418
|
+
column("__c2", DOUBLE, config().set("path", "$._c2")), column("__c3", STRING, config().set("path", "$._c3")),
|
419
|
+
column("__c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z").set("path", "$._c4")),
|
420
|
+
column("__c5", JSON, config().set("path", "$._c5")));
|
421
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
422
|
+
|
423
|
+
transaction(config, fileInput(
|
424
|
+
"[",
|
425
|
+
"{},",
|
426
|
+
"{\"_c0\":null,\"_c1\":null,\"_c2\":null},",
|
427
|
+
"{\"_c3\":null,\"_c4\":null,\"_c5\":null},",
|
428
|
+
"{}",
|
429
|
+
"]"
|
430
|
+
));
|
431
|
+
|
432
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
433
|
+
assertEquals(4, records.size());
|
434
|
+
|
435
|
+
for (Object[] record : records) {
|
436
|
+
for (int i = 0; i < 6; i++) {
|
437
|
+
assertNull(record[i]);
|
438
|
+
}
|
439
|
+
}
|
440
|
+
}
|
441
|
+
|
322
442
|
private FileInput fileInput(String... lines)
|
323
443
|
throws Exception
|
324
444
|
{
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-jsonpath
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroyuki Sato
|
8
|
+
- Takuma kanari
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
12
|
+
date: 2017-03-13 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -69,6 +70,7 @@ dependencies:
|
|
69
70
|
description: Parses JSON files with JSONPath read by other file input plugins.
|
70
71
|
email:
|
71
72
|
- hiroysato@gmail.com
|
73
|
+
- chemtrails.t@gmail.com
|
72
74
|
executables: []
|
73
75
|
extensions: []
|
74
76
|
extra_rdoc_files: []
|
@@ -110,7 +112,7 @@ files:
|
|
110
112
|
- src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java
|
111
113
|
- classpath/accessors-smart-1.1.jar
|
112
114
|
- classpath/asm-5.0.3.jar
|
113
|
-
- classpath/embulk-parser-jsonpath-0.
|
115
|
+
- classpath/embulk-parser-jsonpath-0.2.0.jar
|
114
116
|
- classpath/json-path-2.2.0.jar
|
115
117
|
- classpath/json-smart-2.2.1.jar
|
116
118
|
- classpath/slf4j-api-1.7.16.jar
|