embulk-parser-jsonpath 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8fa00c28b1cddf31875627709899e97cd22e68c2
4
- data.tar.gz: 9dce9b4ee36202ef63065b676221622f420091f5
3
+ metadata.gz: 183e0da4c58eb9f093c8f2f650b3b405e43cf753
4
+ data.tar.gz: 5bb2a2cfa9045013efc7b1b4dd2b48f03dc6956d
5
5
  SHA512:
6
- metadata.gz: ce5b684201f61e8afc284c64b32e9470f468c93ea68b739815507b40c4ad69f703c766bb0eb4a834cabc019826bb519d0bb69f9ff9f0c97aa1afdc5ebd7eb2bc
7
- data.tar.gz: 20732822287850a87b98828efd4e272f179237dcd0416960a0b066b20578675c36530869fc7d7aa9bc7ed269a7746c6da908d0337c78034aedbe4f7cfb2b86b9
6
+ metadata.gz: 858fd75dadb25d362e479669df01c68b2b9b521788824da5c48d3f6c684376a8969176f1fcfb8c8c8efcccb3ad38602b79c14ff503ca2b1c87344ef1298573a1
7
+ data.tar.gz: 17e5e84ccb3e7441a096fc35d451e0c544364021ae048038f8f7f267fff5da4d8ddc5c9bae0a5e3aa5f476438a371b6d9dff60642cdf8b8d76ac09b1a749c00f
@@ -1,5 +1,9 @@
1
1
  # ChangeLog
2
2
 
3
+ ## 0.1.3 (2017-03-07)
4
+
5
+ * Make enable to skip invalid records/columns(@takumakanari)
6
+
3
7
  ## 0.1.2 (2017-02-28)
4
8
 
5
9
  * Output error detail when json parse failed.
@@ -14,7 +14,7 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- version = "0.1.2"
17
+ version = "0.1.3"
18
18
 
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
@@ -1,8 +1,10 @@
1
1
  package org.embulk.parser.jsonpath;
2
2
 
3
3
  import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
4
5
  import com.google.common.collect.ImmutableMap;
5
6
  import com.jayway.jsonpath.JsonPath;
7
+ import com.jayway.jsonpath.PathNotFoundException;
6
8
  import org.embulk.config.Config;
7
9
  import org.embulk.config.ConfigDefault;
8
10
  import org.embulk.config.ConfigSource;
@@ -17,6 +19,7 @@ import org.embulk.spi.PageOutput;
17
19
  import org.embulk.spi.ParserPlugin;
18
20
  import org.embulk.spi.Schema;
19
21
  import org.embulk.spi.SchemaConfig;
22
+ import org.embulk.spi.json.JsonParseException;
20
23
  import org.embulk.spi.json.JsonParser;
21
24
  import org.embulk.spi.time.TimestampParser;
22
25
  import org.embulk.spi.util.FileInputInputStream;
@@ -93,48 +96,63 @@ public class JsonpathParserPlugin
93
96
  try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
94
97
  ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, schema, pageBuilder, timestampParsers);
95
98
 
96
- try (FileInputInputStream is = new FileInputInputStream(input)) {
97
- while (is.nextFile()) {
98
- // TODO more efficient handling.
99
- Value value;
100
- String json = JsonPath.read(is, jsonRoot).toString();
99
+ FileInputInputStream is = new FileInputInputStream(input);
100
+ while (is.nextFile()) {
101
+ Value value;
102
+ try {
103
+ String json;
104
+ try {
105
+ json = JsonPath.read(is, jsonRoot).toString();
106
+ }
107
+ catch (IOException e) {
108
+ throw Throwables.propagate(e);
109
+ }
110
+ catch (PathNotFoundException e) {
111
+ throw new DataException(String.format(Locale.ENGLISH, "Failed to get json root reason = %s",
112
+ e.getMessage()));
113
+ }
114
+
101
115
  try {
102
116
  value = jsonParser.parse(json);
103
117
  }
104
- catch (Exception ex) {
105
- logger.error(String.format(Locale.ENGLISH, "Parse failed input data = '%s'", json));
106
- throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'", ex.getMessage(), json));
118
+ catch (JsonParseException e) {
119
+ throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'",
120
+ e.getMessage(), json));
107
121
  }
108
122
 
109
123
  if (!value.isArrayValue()) {
110
124
  throw new JsonRecordValidateException("Json string is not representing array value.");
111
125
  }
126
+ }
127
+ catch (DataException e) {
128
+ skipOrThrow(e, stopOnInvalidRecord);
129
+ continue;
130
+ }
112
131
 
113
- for (Value recordValue : value.asArrayValue()) {
114
- if (!recordValue.isMapValue()) {
115
- if (stopOnInvalidRecord) {
116
- throw new JsonRecordValidateException("Json string is not representing map value.");
117
- }
118
- logger.warn(String.format(ENGLISH, "Skipped invalid record %s", recordValue));
119
- continue;
120
- }
132
+ for (Value recordValue : value.asArrayValue()) {
133
+ if (!recordValue.isMapValue()) {
134
+ skipOrThrow(new JsonRecordValidateException("Json string is not representing map value."),
135
+ stopOnInvalidRecord);
136
+ continue;
137
+ }
121
138
 
122
- logger.debug("recordValue = " + recordValue.toString());
123
- final Map<Value, Value> record = recordValue.asMapValue().map();
139
+ logger.debug("recordValue = " + recordValue.toString());
140
+ final Map<Value, Value> record = recordValue.asMapValue().map();
141
+ try {
124
142
  for (Column column : schema.getColumns()) {
125
143
  Value v = record.get(getColumnNameValue(column));
126
144
  visitor.setValue(v);
127
145
  column.visit(visitor);
128
146
  }
129
-
130
- pageBuilder.addRecord();
131
147
  }
148
+ catch (DataException e) {
149
+ skipOrThrow(e, stopOnInvalidRecord);
150
+ continue;
151
+ }
152
+
153
+ pageBuilder.addRecord();
132
154
  }
133
155
  }
134
- catch (IOException e) {
135
- // TODO more efficient exception handling.
136
- throw new DataException("catch IOException " + e);
137
- }
138
156
 
139
157
  pageBuilder.finish();
140
158
  }
@@ -154,4 +172,12 @@ public class JsonpathParserPlugin
154
172
  {
155
173
  return columnNameValues.get(column.getName());
156
174
  }
175
+
176
+ private void skipOrThrow(DataException cause, boolean stopOnInvalidRecord)
177
+ {
178
+ if (stopOnInvalidRecord) {
179
+ throw cause;
180
+ }
181
+ logger.warn(String.format(ENGLISH, "Skipped invalid record (%s)", cause));
182
+ }
157
183
  }
@@ -122,7 +122,7 @@ public class TestJsonpathParserPlugin
122
122
  }
123
123
 
124
124
  @Test
125
- public void throwDataException()
125
+ public void skipBrokenJson()
126
126
  throws Exception
127
127
  {
128
128
  SchemaConfig schema = schema(
@@ -130,6 +130,70 @@ public class TestJsonpathParserPlugin
130
130
  column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
131
131
  ConfigSource config = this.config.deepCopy().set("columns", schema);
132
132
 
133
+ transaction(config, fileInput("BROKEN"));
134
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
135
+ assertEquals(0, records.size());
136
+ }
137
+
138
+ @Test
139
+ public void skipBrokenColumn()
140
+ throws Exception
141
+ {
142
+ SchemaConfig schema = schema(column("_c1", TIMESTAMP));
143
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
144
+ set("stop_on_invalid_record", false);
145
+
146
+ transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
147
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
148
+ assertEquals(0, records.size());
149
+ }
150
+
151
+ @Test
152
+ public void stopOnBrokenJson()
153
+ throws Exception
154
+ {
155
+ SchemaConfig schema = schema(
156
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
157
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
158
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
159
+ set("stop_on_invalid_record", true);
160
+
161
+ try {
162
+ transaction(config, fileInput("BROKEN"));
163
+ fail();
164
+ }
165
+ catch (Throwable t) {
166
+ assertTrue(t instanceof DataException);
167
+ }
168
+ }
169
+
170
+ @Test
171
+ public void stopOnBrokenColumn()
172
+ throws Exception
173
+ {
174
+ SchemaConfig schema = schema(column("_c1", TIMESTAMP));
175
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
176
+ set("stop_on_invalid_record", true);
177
+
178
+ try {
179
+ transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
180
+ fail();
181
+ }
182
+ catch (Throwable t) {
183
+ assertTrue(t instanceof DataException);
184
+ }
185
+ }
186
+
187
+ @Test
188
+ public void throwDataException()
189
+ throws Exception
190
+ {
191
+ SchemaConfig schema = schema(
192
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
193
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
194
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
195
+ set("stop_on_invalid_record", true);
196
+
133
197
  try {
134
198
  transaction(config, fileInput(
135
199
  "\"not_map_value\""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-jsonpath
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroyuki Sato
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-28 00:00:00.000000000 Z
11
+ date: 2017-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -110,7 +110,7 @@ files:
110
110
  - src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java
111
111
  - classpath/accessors-smart-1.1.jar
112
112
  - classpath/asm-5.0.3.jar
113
- - classpath/embulk-parser-jsonpath-0.1.2.jar
113
+ - classpath/embulk-parser-jsonpath-0.1.3.jar
114
114
  - classpath/json-path-2.2.0.jar
115
115
  - classpath/json-smart-2.2.1.jar
116
116
  - classpath/slf4j-api-1.7.16.jar