embulk-parser-jsonpath 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 183e0da4c58eb9f093c8f2f650b3b405e43cf753
4
- data.tar.gz: 5bb2a2cfa9045013efc7b1b4dd2b48f03dc6956d
3
+ metadata.gz: 5a5c5cf3bdba81a3a1bb16dae74d6c4a7af3379c
4
+ data.tar.gz: e041030833ba05bcc5967e84d4d0e6604b10094c
5
5
  SHA512:
6
- metadata.gz: 858fd75dadb25d362e479669df01c68b2b9b521788824da5c48d3f6c684376a8969176f1fcfb8c8c8efcccb3ad38602b79c14ff503ca2b1c87344ef1298573a1
7
- data.tar.gz: 17e5e84ccb3e7441a096fc35d451e0c544364021ae048038f8f7f267fff5da4d8ddc5c9bae0a5e3aa5f476438a371b6d9dff60642cdf8b8d76ac09b1a749c00f
6
+ metadata.gz: 2a532592bc7672a092c43da076b51a0773284a55db1e1b3a4a547247031f981a3acdde9afa53ed5653f4742b94d7abc9ed76bb93ac4cae1ed58bff6eb3242a54
7
+ data.tar.gz: 428416e6b448f50458c7e36e0b1de078fb110737ae081b3a92e70751639216efd49f91017743ecfdcb20d23bdfab16d168621daf45981b7c8c70318a5af74604
@@ -1,5 +1,11 @@
1
1
  # ChangeLog
2
2
 
3
+ ## 0.2.0 (2017-03-13)
4
+
5
+ * Support `path` parameter in column config. (@takumakanari)
6
+ * Allow some strings convert to boolean.(@takumakanari)
7
+ * Support `schema` parameter for compatibility [embulk-parser-json](https://github.com/takumakanari/embulk-parser-json).
8
+
3
9
  ## 0.1.3 (2017-03-07)
4
10
 
5
11
  * Make enable to skip invalid records/columns(@takumakanari)
data/README.md CHANGED
@@ -17,8 +17,19 @@ The JSON with [JSONPath](http://goessner.net/articles/JsonPath/) parser plugin f
17
17
  * **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`)
18
18
  * **default_typecast**: Specify whether to cast values automatically to the specified types or not (boolean, default: true)
19
19
 
20
+ ### columns
21
+
22
+ * **name**: Name of the column (string, required)
23
+ * **type**: Type of the column (string, required)
24
+ * **timezone**: Timezone of the timestamp if type is timestamp (string, default: default_timestamp)
25
+ * **format**: Format of the timestamp if type is timestamp (string, default: default_format)
26
+ * **typecast**: Whether cast values or not (boolean, default: default_typecast)
27
+ * **path**: JSON ppath for specific column. (string, default: `null`)
28
+
20
29
  ## Example
21
30
 
31
+ ### Basic Usage
32
+
22
33
  ```json
23
34
  {
24
35
  "count": 100,
@@ -88,12 +99,104 @@ registered_at (timestamp) : 2014-06-30 19:25:27 UTC
88
99
  age ( long) : 73
89
100
  ratio ( double) : 50.608
90
101
  ```
102
+ ### Handle more complicated json
103
+
104
+
105
+ If you want to handle more complicated json, you can specify jsonpath to also **path** in columns section like as follows:
106
+
107
+ ```json
108
+ {
109
+ "result" : "success",
110
+ "students" : [
111
+ { "names" : ["John", "Lennon"], "age" : 10 },
112
+ { "names" : ["Paul", "Maccartney"], "age" : 10 }
113
+ ]
114
+ }
115
+ ```
116
+
117
+ ```yaml
118
+ root: $.students
119
+ columns:
120
+ - {name: firstName, type: string, path: "names[0]"}
121
+ - {name: lastName, type: string, path: "names[1]"}
122
+ ```
123
+
124
+ In this case, names[0] will be firstName of schema and names[1] will be lastName.
125
+
126
+ ## Guess
127
+
128
+ This plugin supports minimal `guess` feature. You don't have to write `parser:` section in the configuration file.
129
+ After writing `in:` section, you can let embulk guess `parser:` section using this command:
91
130
 
92
131
  ```
93
132
  $ embulk gem install embulk-parser-jsonpath
94
133
  $ embulk guess -g jsonpath config.yml -o guessed.yml
95
134
  ```
96
135
 
136
+ ### Example
137
+
138
+ If you want to `guess` the following JSON file,
139
+ (This JSON data start with array)
140
+ You don't have to need `parser section`.
141
+
142
+ ```json
143
+ [
144
+ {
145
+ "name": "Hugh Rutherford",
146
+ "city": "Mitchellfurt",
147
+ "street_name": "Ondricka Island",
148
+ "zip_code": "75232",
149
+ "registered_at": "2015-09-09 05:28:45",
150
+ "vegetarian": true,
151
+ "age": 44,
152
+ "ratio": 79.092
153
+ }
154
+ ]
155
+ ```
156
+
157
+ ```yaml
158
+ in:
159
+ type: file
160
+ path_prefix: example/hoge
161
+ out:
162
+ type: stdout
163
+ ```
164
+
165
+ However, If a JSON data doesn't start with array,
166
+ You have to specify `root` parameter explicitly.
167
+
168
+ ```json
169
+ {
170
+ "count": 100,
171
+ "page": 1,
172
+ "results": [
173
+ {
174
+ "name": "Hugh Rutherford",
175
+ "city": "Mitchellfurt",
176
+ "street_name": "Ondricka Island",
177
+ "zip_code": "75232",
178
+ "registered_at": "2015-09-09 05:28:45",
179
+ "vegetarian": true,
180
+ "age": 44,
181
+ "ratio": 79.092
182
+ }
183
+ ]
184
+ }
185
+ ```
186
+
187
+
188
+ ```yaml
189
+ in:
190
+ type: file
191
+ path_prefix: example/input
192
+ parser:
193
+ type: jsonpath
194
+ root: "$.results"
195
+ out:
196
+ type: stdout
197
+ ```
198
+
199
+
97
200
  ## Build
98
201
 
99
202
  ```
@@ -14,7 +14,7 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- version = "0.1.3"
17
+ version = "0.2.0"
18
18
 
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
@@ -90,10 +90,10 @@ task gemspec {
90
90
  Gem::Specification.new do |spec|
91
91
  spec.name = "${project.name}"
92
92
  spec.version = "${project.version}"
93
- spec.authors = ["Hiroyuki Sato"]
93
+ spec.authors = ["Hiroyuki Sato","Takuma kanari"]
94
94
  spec.summary = %[JSON parser with JSONPath plugin for Embulk]
95
95
  spec.description = %[Parses JSON files with JSONPath read by other file input plugins.]
96
- spec.email = ["hiroysato@gmail.com"]
96
+ spec.email = ["hiroysato@gmail.com","chemtrails.t@gmail.com"]
97
97
  spec.licenses = ["MIT"]
98
98
  spec.homepage = "https://github.com/hiroyuki-sato/embulk-parser-jsonpath"
99
99
 
@@ -1,29 +1,44 @@
1
1
  package org.embulk.parser.jsonpath;
2
2
 
3
+ import com.fasterxml.jackson.databind.JsonNode;
3
4
  import com.google.common.base.Optional;
5
+ import com.google.common.collect.ImmutableList;
4
6
  import org.embulk.parser.jsonpath.JsonpathParserPlugin.PluginTask;
5
7
  import org.embulk.parser.jsonpath.JsonpathParserPlugin.TypecastColumnOption;
6
-
7
8
  import org.embulk.spi.Column;
8
9
  import org.embulk.spi.ColumnConfig;
9
10
  import org.embulk.spi.ColumnVisitor;
10
11
  import org.embulk.spi.PageBuilder;
11
12
  import org.embulk.spi.Schema;
12
13
  import org.embulk.spi.SchemaConfig;
14
+ import org.embulk.spi.json.JsonParseException;
15
+ import org.embulk.spi.json.JsonParser;
13
16
  import org.embulk.spi.time.Timestamp;
14
17
  import org.embulk.spi.time.TimestampParser;
15
18
  import org.msgpack.core.MessageTypeException;
16
- import org.msgpack.value.Value;
17
19
 
18
- public class ColumnVisitorImpl implements ColumnVisitor
20
+ import java.util.List;
21
+
22
+ import static java.lang.String.format;
23
+ import static org.msgpack.value.ValueFactory.newBoolean;
24
+ import static org.msgpack.value.ValueFactory.newFloat;
25
+ import static org.msgpack.value.ValueFactory.newInteger;
26
+ import static org.msgpack.value.ValueFactory.newString;
27
+
28
+ public class ColumnVisitorImpl
29
+ implements ColumnVisitor
19
30
  {
31
+ private static final JsonParser JSON_PARSER = new JsonParser();
32
+ private static final List<String> BOOL_TRUE_STRINGS = ImmutableList.of("true", "1", "yes", "on", "y", "t");
33
+ private static final List<String> BOOL_FALSE_STRINGS = ImmutableList.of("false", "0", "no", "off", "n", "f");
34
+
20
35
  protected final PluginTask task;
21
36
  protected final Schema schema;
22
37
  protected final PageBuilder pageBuilder;
23
38
  protected final TimestampParser[] timestampParsers;
24
39
  protected final Boolean[] autoTypecasts;
25
40
 
26
- protected Value value;
41
+ protected JsonNode value;
27
42
 
28
43
  public ColumnVisitorImpl(PluginTask task, Schema schema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
29
44
  {
@@ -41,20 +56,20 @@ public class ColumnVisitorImpl implements ColumnVisitor
41
56
  this.autoTypecasts[column.getIndex()] = task.getDefaultTypecast();
42
57
  }
43
58
 
44
- // Optional<SchemaConfig> schemaConfig = task.getColumns();
45
- SchemaConfig schemaConfig = task.getSchemaConfig();
59
+ // typecast option supports `columns` only.
60
+ Optional<SchemaConfig> schemaConfig = task.getSchemaConfig();
46
61
 
47
- // if (schemaConfig.isPresent()) {
48
- for (ColumnConfig columnConfig : schemaConfig.getColumns()) {
49
- TypecastColumnOption columnOption = columnConfig.getOption().loadConfig(TypecastColumnOption.class);
50
- Boolean autoTypecast = columnOption.getTypecast().or(task.getDefaultTypecast());
51
- Column column = schema.lookupColumn(columnConfig.getName());
52
- this.autoTypecasts[column.getIndex()] = autoTypecast;
62
+ if (schemaConfig.isPresent()) {
63
+ for (ColumnConfig columnConfig : schemaConfig.get().getColumns()) {
64
+ TypecastColumnOption columnOption = columnConfig.getOption().loadConfig(TypecastColumnOption.class);
65
+ Boolean autoTypecast = columnOption.getTypecast().or(task.getDefaultTypecast());
66
+ Column column = schema.lookupColumn(columnConfig.getName());
67
+ this.autoTypecasts[column.getIndex()] = autoTypecast;
68
+ }
53
69
  }
54
- // }
55
70
  }
56
71
 
57
- public void setValue(Value value)
72
+ public void setValue(JsonNode value)
58
73
  {
59
74
  this.value = value;
60
75
  }
@@ -64,15 +79,32 @@ public class ColumnVisitorImpl implements ColumnVisitor
64
79
  {
65
80
  if (isNil(value)) {
66
81
  pageBuilder.setNull(column);
82
+ return;
83
+ }
84
+
85
+ final boolean val;
86
+ if (value.isBoolean()) {
87
+ val = value.asBoolean();
67
88
  }
68
89
  else {
69
- try {
70
- boolean booleanValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asBoolean(value) : value.asBooleanValue().getBoolean();
71
- pageBuilder.setBoolean(column, booleanValue);
90
+ String stringValue = valueAsString().toLowerCase();
91
+ if (BOOL_TRUE_STRINGS.contains(stringValue)) {
92
+ val = true;
72
93
  }
73
- catch (MessageTypeException e) {
74
- throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Boolean", value), e);
94
+ else if (BOOL_FALSE_STRINGS.contains(stringValue)) {
95
+ val = false;
75
96
  }
97
+ else {
98
+ throw new JsonRecordValidateException(format("can not convert '%s' to Boolean", value));
99
+ }
100
+ }
101
+
102
+ try {
103
+ boolean booleanValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asBoolean(newBoolean(val)) : val;
104
+ pageBuilder.setBoolean(column, booleanValue);
105
+ }
106
+ catch (MessageTypeException e) {
107
+ throw new JsonRecordValidateException(format("failed to get \"%s\" as Boolean", value), e);
76
108
  }
77
109
  }
78
110
 
@@ -84,11 +116,11 @@ public class ColumnVisitorImpl implements ColumnVisitor
84
116
  }
85
117
  else {
86
118
  try {
87
- long longValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asLong(value) : value.asIntegerValue().toLong();
119
+ long longValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asLong(newInteger(value.asLong())) : value.asLong();
88
120
  pageBuilder.setLong(column, longValue);
89
121
  }
90
122
  catch (MessageTypeException e) {
91
- throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Long", value), e);
123
+ throw new JsonRecordValidateException(format("failed to get \"%s\" as Long", value), e);
92
124
  }
93
125
  }
94
126
  }
@@ -101,11 +133,11 @@ public class ColumnVisitorImpl implements ColumnVisitor
101
133
  }
102
134
  else {
103
135
  try {
104
- double doubleValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asDouble(value) : value.asFloatValue().toDouble();
136
+ double doubleValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asDouble(newFloat(value.asDouble())) : value.asDouble();
105
137
  pageBuilder.setDouble(column, doubleValue);
106
138
  }
107
139
  catch (MessageTypeException e) {
108
- throw new JsonRecordValidateException(String.format("failed get \"%s\" as Double", value), e);
140
+ throw new JsonRecordValidateException(format("failed get \"%s\" as Double", value), e);
109
141
  }
110
142
  }
111
143
  }
@@ -117,12 +149,13 @@ public class ColumnVisitorImpl implements ColumnVisitor
117
149
  pageBuilder.setNull(column);
118
150
  }
119
151
  else {
152
+ final String stringValue = valueAsString();
120
153
  try {
121
- String string = autoTypecasts[column.getIndex()] ? ColumnCaster.asString(value) : value.asStringValue().toString();
154
+ String string = autoTypecasts[column.getIndex()] ? ColumnCaster.asString(newString(stringValue)) : stringValue;
122
155
  pageBuilder.setString(column, string);
123
156
  }
124
157
  catch (MessageTypeException e) {
125
- throw new JsonRecordValidateException(String.format("failed to get \"%s\" as String", value), e);
158
+ throw new JsonRecordValidateException(format("failed to get \"%s\" as String", value), e);
126
159
  }
127
160
  }
128
161
  }
@@ -135,11 +168,11 @@ public class ColumnVisitorImpl implements ColumnVisitor
135
168
  }
136
169
  else {
137
170
  try {
138
- Timestamp timestamp = ColumnCaster.asTimestamp(value, timestampParsers[column.getIndex()]);
171
+ Timestamp timestamp = ColumnCaster.asTimestamp(newString(value.asText()), timestampParsers[column.getIndex()]);
139
172
  pageBuilder.setTimestamp(column, timestamp);
140
173
  }
141
174
  catch (MessageTypeException e) {
142
- throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Timestamp", value), e);
175
+ throw new JsonRecordValidateException(format("failed to get \"%s\" as Timestamp", value), e);
143
176
  }
144
177
  }
145
178
  }
@@ -152,16 +185,21 @@ public class ColumnVisitorImpl implements ColumnVisitor
152
185
  }
153
186
  else {
154
187
  try {
155
- pageBuilder.setJson(column, value);
188
+ pageBuilder.setJson(column, JSON_PARSER.parse(valueAsString()));
156
189
  }
157
- catch (MessageTypeException e) {
158
- throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Json", value), e);
190
+ catch (MessageTypeException | JsonParseException e) {
191
+ throw new JsonRecordValidateException(format("failed to get \"%s\" as Json", value), e);
159
192
  }
160
193
  }
161
194
  }
162
195
 
163
- protected boolean isNil(Value v)
196
+ protected boolean isNil(JsonNode v)
197
+ {
198
+ return v == null || v.isNull();
199
+ }
200
+
201
+ private String valueAsString()
164
202
  {
165
- return v == null || v.isNilValue();
203
+ return value.isTextual() ? value.asText() : value.toString();
166
204
  }
167
205
  }
@@ -1,16 +1,23 @@
1
1
  package org.embulk.parser.jsonpath;
2
2
 
3
+ import com.fasterxml.jackson.databind.JsonNode;
4
+ import com.fasterxml.jackson.databind.node.JsonNodeType;
3
5
  import com.google.common.base.Optional;
4
- import com.google.common.base.Throwables;
5
6
  import com.google.common.collect.ImmutableMap;
7
+ import com.jayway.jsonpath.Configuration;
8
+ import com.jayway.jsonpath.InvalidJsonException;
6
9
  import com.jayway.jsonpath.JsonPath;
7
10
  import com.jayway.jsonpath.PathNotFoundException;
11
+ import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
12
+ import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
8
13
  import org.embulk.config.Config;
9
14
  import org.embulk.config.ConfigDefault;
15
+ import org.embulk.config.ConfigException;
10
16
  import org.embulk.config.ConfigSource;
11
17
  import org.embulk.config.Task;
12
18
  import org.embulk.config.TaskSource;
13
19
  import org.embulk.spi.Column;
20
+ import org.embulk.spi.ColumnConfig;
14
21
  import org.embulk.spi.DataException;
15
22
  import org.embulk.spi.Exec;
16
23
  import org.embulk.spi.FileInput;
@@ -19,20 +26,16 @@ import org.embulk.spi.PageOutput;
19
26
  import org.embulk.spi.ParserPlugin;
20
27
  import org.embulk.spi.Schema;
21
28
  import org.embulk.spi.SchemaConfig;
22
- import org.embulk.spi.json.JsonParseException;
23
- import org.embulk.spi.json.JsonParser;
24
29
  import org.embulk.spi.time.TimestampParser;
25
30
  import org.embulk.spi.util.FileInputInputStream;
26
31
  import org.embulk.spi.util.Timestamps;
27
- import org.msgpack.value.Value;
28
32
  import org.slf4j.Logger;
29
33
 
30
- import java.io.IOException;
31
34
  import java.util.Locale;
32
35
  import java.util.Map;
33
36
 
37
+ import static java.lang.String.format;
34
38
  import static java.util.Locale.ENGLISH;
35
- import static org.msgpack.value.ValueFactory.newString;
36
39
 
37
40
  public class JsonpathParserPlugin
38
41
  implements ParserPlugin
@@ -40,14 +43,18 @@ public class JsonpathParserPlugin
40
43
 
41
44
  private static final Logger logger = Exec.getLogger(JsonpathParserPlugin.class);
42
45
 
43
- private Map<String, Value> columnNameValues;
46
+ private static final Configuration JSON_PATH_CONFIG = Configuration
47
+ .builder()
48
+ .mappingProvider(new JacksonMappingProvider())
49
+ .jsonProvider(new JacksonJsonNodeJsonProvider())
50
+ .build();
44
51
 
45
52
  public interface TypecastColumnOption
46
53
  extends Task
47
54
  {
48
55
  @Config("typecast")
49
56
  @ConfigDefault("null")
50
- public Optional<Boolean> getTypecast();
57
+ Optional<Boolean> getTypecast();
51
58
  }
52
59
 
53
60
  public interface PluginTask
@@ -55,10 +62,16 @@ public class JsonpathParserPlugin
55
62
  {
56
63
  @Config("root")
57
64
  @ConfigDefault("\"$\"")
58
- public String getRoot();
65
+ String getRoot();
59
66
 
60
67
  @Config("columns")
61
- SchemaConfig getSchemaConfig();
68
+ @ConfigDefault("null")
69
+ Optional<SchemaConfig> getSchemaConfig();
70
+
71
+ @Config("schema")
72
+ @ConfigDefault("null")
73
+ @Deprecated
74
+ Optional<SchemaConfig> getOldSchemaConfig();
62
75
 
63
76
  @Config("default_typecast")
64
77
  @ConfigDefault("true")
@@ -69,12 +82,20 @@ public class JsonpathParserPlugin
69
82
  boolean getStopOnInvalidRecord();
70
83
  }
71
84
 
85
+ public interface JsonpathColumnOption
86
+ extends Task
87
+ {
88
+ @Config("path")
89
+ @ConfigDefault("null")
90
+ Optional<String> getPath();
91
+ }
92
+
72
93
  @Override
73
94
  public void transaction(ConfigSource config, ParserPlugin.Control control)
74
95
  {
75
96
  PluginTask task = config.loadConfig(PluginTask.class);
76
97
 
77
- Schema schema = task.getSchemaConfig().toSchema();
98
+ Schema schema = getSchemaConfig(task).toSchema();
78
99
 
79
100
  control.run(task.dump(), schema);
80
101
  }
@@ -86,11 +107,9 @@ public class JsonpathParserPlugin
86
107
  PluginTask task = taskSource.loadTask(PluginTask.class);
87
108
  String jsonRoot = task.getRoot();
88
109
 
89
- setColumnNameValues(schema);
90
-
91
110
  logger.info("JSONPath = " + jsonRoot);
92
- final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig());
93
- final JsonParser jsonParser = new JsonParser();
111
+ final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, getSchemaConfig(task));
112
+ final Map<Column, String> jsonPathMap = createJsonPathMap(task, schema);
94
113
  final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
95
114
 
96
115
  try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
@@ -98,59 +117,56 @@ public class JsonpathParserPlugin
98
117
 
99
118
  FileInputInputStream is = new FileInputInputStream(input);
100
119
  while (is.nextFile()) {
101
- Value value;
120
+ final JsonNode json;
102
121
  try {
103
- String json;
104
- try {
105
- json = JsonPath.read(is, jsonRoot).toString();
106
- }
107
- catch (IOException e) {
108
- throw Throwables.propagate(e);
109
- }
110
- catch (PathNotFoundException e) {
111
- throw new DataException(String.format(Locale.ENGLISH, "Failed to get json root reason = %s",
112
- e.getMessage()));
113
- }
114
-
115
- try {
116
- value = jsonParser.parse(json);
117
- }
118
- catch (JsonParseException e) {
119
- throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'",
120
- e.getMessage(), json));
121
- }
122
-
123
- if (!value.isArrayValue()) {
124
- throw new JsonRecordValidateException("Json string is not representing array value.");
125
- }
122
+ json = JsonPath.using(JSON_PATH_CONFIG).parse(is).read(jsonRoot, JsonNode.class);
126
123
  }
127
- catch (DataException e) {
128
- skipOrThrow(e, stopOnInvalidRecord);
124
+ catch (PathNotFoundException e) {
125
+ skipOrThrow(new DataException(format(Locale.ENGLISH,
126
+ "Failed to get root json path='%s'", jsonRoot)), stopOnInvalidRecord);
127
+ continue;
128
+ }
129
+ catch (InvalidJsonException e) {
130
+ skipOrThrow(new DataException(e), stopOnInvalidRecord);
129
131
  continue;
130
132
  }
131
133
 
132
- for (Value recordValue : value.asArrayValue()) {
133
- if (!recordValue.isMapValue()) {
134
- skipOrThrow(new JsonRecordValidateException("Json string is not representing map value."),
135
- stopOnInvalidRecord);
136
- continue;
137
- }
134
+ if (!json.isArray()) {
135
+ skipOrThrow(new JsonRecordValidateException(format(Locale.ENGLISH,
136
+ "Json string is not representing array value json='%s'", json)), stopOnInvalidRecord);
137
+ continue;
138
+ }
138
139
 
139
- logger.debug("recordValue = " + recordValue.toString());
140
- final Map<Value, Value> record = recordValue.asMapValue().map();
140
+ for (JsonNode recordValue : json) {
141
141
  try {
142
+ if (recordValue.getNodeType() != JsonNodeType.OBJECT) {
143
+ throw new JsonRecordValidateException(format(Locale.ENGLISH,
144
+ "Json string is not representing map value json='%s'", recordValue));
145
+ }
146
+
142
147
  for (Column column : schema.getColumns()) {
143
- Value v = record.get(getColumnNameValue(column));
144
- visitor.setValue(v);
148
+ JsonNode value = null;
149
+ if (jsonPathMap.containsKey(column)) {
150
+ try {
151
+ value = JsonPath.using(JSON_PATH_CONFIG).parse(recordValue).read(jsonPathMap.get(column));
152
+ }
153
+ catch (PathNotFoundException e) {
154
+ // pass (value is nullable)
155
+ }
156
+ }
157
+ else {
158
+ value = recordValue.get(column.getName());
159
+ }
160
+ visitor.setValue(value);
145
161
  column.visit(visitor);
146
162
  }
163
+
164
+ pageBuilder.addRecord();
147
165
  }
148
166
  catch (DataException e) {
149
167
  skipOrThrow(e, stopOnInvalidRecord);
150
168
  continue;
151
169
  }
152
-
153
- pageBuilder.addRecord();
154
170
  }
155
171
  }
156
172
 
@@ -158,19 +174,17 @@ public class JsonpathParserPlugin
158
174
  }
159
175
  }
160
176
 
161
- private void setColumnNameValues(Schema schema)
177
+ private Map<Column, String> createJsonPathMap(PluginTask task, Schema schema)
162
178
  {
163
- ImmutableMap.Builder<String, Value> builder = ImmutableMap.builder();
164
- for (Column column : schema.getColumns()) {
165
- String name = column.getName();
166
- builder.put(name, newString(name));
179
+ ImmutableMap.Builder<Column, String> builder = ImmutableMap.builder();
180
+ for (int i = 0; i < schema.size(); i++) {
181
+ ColumnConfig config = getSchemaConfig(task).getColumn(i);
182
+ JsonpathColumnOption option = config.getOption().loadConfig(JsonpathColumnOption.class);
183
+ if (option.getPath().isPresent()) {
184
+ builder.put(schema.getColumn(i), option.getPath().get());
185
+ }
167
186
  }
168
- columnNameValues = builder.build();
169
- }
170
-
171
- private Value getColumnNameValue(Column column)
172
- {
173
- return columnNameValues.get(column.getName());
187
+ return builder.build();
174
188
  }
175
189
 
176
190
  private void skipOrThrow(DataException cause, boolean stopOnInvalidRecord)
@@ -180,4 +194,19 @@ public class JsonpathParserPlugin
180
194
  }
181
195
  logger.warn(String.format(ENGLISH, "Skipped invalid record (%s)", cause));
182
196
  }
197
+
198
+ // this method is to keep the backward compatibility of 'schema' option.
199
+ private SchemaConfig getSchemaConfig(PluginTask task)
200
+ {
201
+ if (task.getSchemaConfig().isPresent()) {
202
+ return task.getSchemaConfig().get();
203
+ }
204
+ else if (task.getOldSchemaConfig().isPresent()) {
205
+ logger.warn("Please use 'columns' option instead of 'schema' because the 'schema' option is deprecated. The next version will stop 'schema' option support.");
206
+ return task.getOldSchemaConfig().get();
207
+ }
208
+ else {
209
+ throw new ConfigException("Attribute 'columns' is required but not set");
210
+ }
211
+ }
183
212
  }
@@ -26,7 +26,6 @@ import java.io.ByteArrayInputStream;
26
26
  import java.io.File;
27
27
  import java.io.IOException;
28
28
  import java.io.InputStream;
29
- import java.nio.charset.Charset;
30
29
  import java.nio.charset.StandardCharsets;
31
30
  import java.util.List;
32
31
 
@@ -37,6 +36,7 @@ import static org.embulk.spi.type.Types.LONG;
37
36
  import static org.embulk.spi.type.Types.STRING;
38
37
  import static org.embulk.spi.type.Types.TIMESTAMP;
39
38
  import static org.junit.Assert.assertEquals;
39
+ import static org.junit.Assert.assertFalse;
40
40
  import static org.junit.Assert.assertNull;
41
41
  import static org.junit.Assert.assertTrue;
42
42
  import static org.junit.Assert.fail;
@@ -167,6 +167,51 @@ public class TestJsonpathParserPlugin
167
167
  }
168
168
  }
169
169
 
170
+ @Test
171
+ public void booleanStrings()
172
+ throws Exception
173
+ {
174
+ SchemaConfig schema = schema(column("_c1", BOOLEAN), column("_c2", BOOLEAN),
175
+ column("_c3", BOOLEAN), column("_c4", BOOLEAN), column("_c5", BOOLEAN),
176
+ column("_c6", BOOLEAN), column("_c7", BOOLEAN), column("_c8", BOOLEAN),
177
+ column("_c9", BOOLEAN), column("_c10", BOOLEAN), column("_c11", BOOLEAN),
178
+ column("_c12", BOOLEAN));
179
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
180
+
181
+ transaction(config, fileInput("[{\"_c1\" : \"yes\", \"_c2\" : \"true\", \"_c3\" : \"1\",",
182
+ "\"_c4\" : \"on\", \"_c5\" : \"y\", \"_c6\" : \"t\",",
183
+ "\"_c7\" : \"no\", \"_c8\" : \"false\", \"_c9\" : \"0\"," ,
184
+ "\"_c10\" : \"off\", \"_c11\" : \"n\", \"_c12\" : \"f\"}]"));
185
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
186
+ assertEquals(1, records.size());
187
+
188
+ Object[] record = records.get(0);
189
+ for (int i = 0; i < 5; i++) {
190
+ assertTrue((boolean) record[i]);
191
+ }
192
+ for (int i = 6; i < 11; i++) {
193
+ assertFalse((boolean) record[i]);
194
+ }
195
+ }
196
+
197
+ @Test
198
+ public void invalidBooleanString()
199
+ throws Exception
200
+ {
201
+ SchemaConfig schema = schema(column("_c1", BOOLEAN));
202
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
203
+ set("stop_on_invalid_record", true);
204
+
205
+ try {
206
+ transaction(config,
207
+ fileInput("[{\"_c1\" : \"INVALID\"}]"));
208
+ fail();
209
+ }
210
+ catch (Throwable t) {
211
+ assertTrue(t instanceof DataException);
212
+ }
213
+ }
214
+
170
215
  @Test
171
216
  public void stopOnBrokenColumn()
172
217
  throws Exception
@@ -216,9 +261,9 @@ public class TestJsonpathParserPlugin
216
261
 
217
262
  transaction(config, fileInput(
218
263
  "[",
219
- "{}",
220
- "{\"_c0\":null,\"_c1\":null,\"_c2\":null}",
221
- "{\"_c3\":null,\"_c4\":null,\"_c5\":null}",
264
+ "{},",
265
+ "{\"_c0\":null,\"_c1\":null,\"_c2\":null},",
266
+ "{\"_c3\":null,\"_c4\":null,\"_c5\":null},",
222
267
  "{}",
223
268
  "]"
224
269
  ));
@@ -244,8 +289,8 @@ public class TestJsonpathParserPlugin
244
289
 
245
290
  transaction(config, fileInput(
246
291
  "[",
247
- "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
248
- "[1, 2, 3]",
292
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}},",
293
+ "[1, 2, 3],",
249
294
  "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
250
295
  "]"
251
296
  ));
@@ -287,8 +332,8 @@ public class TestJsonpathParserPlugin
287
332
 
288
333
  transaction(config, fileInput(
289
334
  "{\"records\":[",
290
- "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}",
291
- "[1, 2, 3]",
335
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}},",
336
+ "[1, 2, 3],",
292
337
  "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
293
338
  "]}"
294
339
  ));
@@ -319,6 +364,81 @@ public class TestJsonpathParserPlugin
319
364
  recreatePageOutput();
320
365
  }
321
366
 
367
+ @Test
368
+ public void useJsonPath()
369
+ throws Exception
370
+ {
371
+ SchemaConfig schema = schema(
372
+ column("__c0", BOOLEAN, config().set("path", "$._c0")), column("__c1", LONG, config().set("path", "$._c1")),
373
+ column("__c2", DOUBLE, config().set("path", "$._c2")), column("__c3", STRING, config().set("path", "$._c3")),
374
+ column("__c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z").set("path", "$._c4")),
375
+ column("__c5", JSON, config().set("path", "$._c5")));
376
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
377
+
378
+ transaction(config, fileInput(
379
+ "[",
380
+ "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}},",
381
+ "[1, 2, 3],",
382
+ "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}",
383
+ "]"
384
+ ));
385
+
386
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
387
+ assertEquals(2, records.size());
388
+
389
+ Object[] record;
390
+ {
391
+ record = records.get(0);
392
+ assertEquals(true, record[0]);
393
+ assertEquals(10L, record[1]);
394
+ assertEquals(0.1, (Double) record[2], 0.0001);
395
+ assertEquals("embulk", record[3]);
396
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
397
+ assertEquals(newMap(newString("k"), newString("v")), record[5]);
398
+ }
399
+ {
400
+ record = records.get(1);
401
+ assertEquals(false, record[0]);
402
+ assertEquals(-10L, record[1]);
403
+ assertEquals(1.0, (Double) record[2], 0.0001);
404
+ assertEquals("エンバルク", record[3]);
405
+ assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]);
406
+ assertEquals(newArray(newString("e0"), newString("e1")), record[5]);
407
+ }
408
+
409
+ recreatePageOutput();
410
+ }
411
+
412
+ @Test
413
+ public void writeNilsWithJsonPath()
414
+ throws Exception
415
+ {
416
+ SchemaConfig schema = schema(
417
+ column("__c0", BOOLEAN, config().set("path", "$._c0")), column("__c1", LONG, config().set("path", "$._c1")),
418
+ column("__c2", DOUBLE, config().set("path", "$._c2")), column("__c3", STRING, config().set("path", "$._c3")),
419
+ column("__c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z").set("path", "$._c4")),
420
+ column("__c5", JSON, config().set("path", "$._c5")));
421
+ ConfigSource config = this.config.deepCopy().set("columns", schema);
422
+
423
+ transaction(config, fileInput(
424
+ "[",
425
+ "{},",
426
+ "{\"_c0\":null,\"_c1\":null,\"_c2\":null},",
427
+ "{\"_c3\":null,\"_c4\":null,\"_c5\":null},",
428
+ "{}",
429
+ "]"
430
+ ));
431
+
432
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
433
+ assertEquals(4, records.size());
434
+
435
+ for (Object[] record : records) {
436
+ for (int i = 0; i < 6; i++) {
437
+ assertNull(record[i]);
438
+ }
439
+ }
440
+ }
441
+
322
442
  private FileInput fileInput(String... lines)
323
443
  throws Exception
324
444
  {
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-jsonpath
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroyuki Sato
8
+ - Takuma kanari
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2017-03-07 00:00:00.000000000 Z
12
+ date: 2017-03-13 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  requirement: !ruby/object:Gem::Requirement
@@ -69,6 +70,7 @@ dependencies:
69
70
  description: Parses JSON files with JSONPath read by other file input plugins.
70
71
  email:
71
72
  - hiroysato@gmail.com
73
+ - chemtrails.t@gmail.com
72
74
  executables: []
73
75
  extensions: []
74
76
  extra_rdoc_files: []
@@ -110,7 +112,7 @@ files:
110
112
  - src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java
111
113
  - classpath/accessors-smart-1.1.jar
112
114
  - classpath/asm-5.0.3.jar
113
- - classpath/embulk-parser-jsonpath-0.1.3.jar
115
+ - classpath/embulk-parser-jsonpath-0.2.0.jar
114
116
  - classpath/json-path-2.2.0.jar
115
117
  - classpath/json-smart-2.2.1.jar
116
118
  - classpath/slf4j-api-1.7.16.jar