embulk-parser-jsonpath 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8fa00c28b1cddf31875627709899e97cd22e68c2
4
- data.tar.gz: 9dce9b4ee36202ef63065b676221622f420091f5
3
+ metadata.gz: 183e0da4c58eb9f093c8f2f650b3b405e43cf753
4
+ data.tar.gz: 5bb2a2cfa9045013efc7b1b4dd2b48f03dc6956d
5
5
  SHA512:
6
- metadata.gz: ce5b684201f61e8afc284c64b32e9470f468c93ea68b739815507b40c4ad69f703c766bb0eb4a834cabc019826bb519d0bb69f9ff9f0c97aa1afdc5ebd7eb2bc
7
- data.tar.gz: 20732822287850a87b98828efd4e272f179237dcd0416960a0b066b20578675c36530869fc7d7aa9bc7ed269a7746c6da908d0337c78034aedbe4f7cfb2b86b9
6
+ metadata.gz: 858fd75dadb25d362e479669df01c68b2b9b521788824da5c48d3f6c684376a8969176f1fcfb8c8c8efcccb3ad38602b79c14ff503ca2b1c87344ef1298573a1
7
+ data.tar.gz: 17e5e84ccb3e7441a096fc35d451e0c544364021ae048038f8f7f267fff5da4d8ddc5c9bae0a5e3aa5f476438a371b6d9dff60642cdf8b8d76ac09b1a749c00f
@@ -1,5 +1,9 @@
1
1
  # ChangeLog
2
2
 
3
+ ## 0.1.3 (2017-03-07)
4
+
5
+ * Make enable to skip invalid records/columns(@takumakanari)
6
+
3
7
  ## 0.1.2 (2017-02-28)
4
8
 
5
9
  * Output error detail when json parse failed.
@@ -14,7 +14,7 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- version = "0.1.2"
17
+ version = "0.1.3"
18
18
 
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
@@ -1,8 +1,10 @@
1
1
  package org.embulk.parser.jsonpath;
2
2
 
3
3
  import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
4
5
  import com.google.common.collect.ImmutableMap;
5
6
  import com.jayway.jsonpath.JsonPath;
7
+ import com.jayway.jsonpath.PathNotFoundException;
6
8
  import org.embulk.config.Config;
7
9
  import org.embulk.config.ConfigDefault;
8
10
  import org.embulk.config.ConfigSource;
@@ -17,6 +19,7 @@ import org.embulk.spi.PageOutput;
17
19
  import org.embulk.spi.ParserPlugin;
18
20
  import org.embulk.spi.Schema;
19
21
  import org.embulk.spi.SchemaConfig;
22
+ import org.embulk.spi.json.JsonParseException;
20
23
  import org.embulk.spi.json.JsonParser;
21
24
  import org.embulk.spi.time.TimestampParser;
22
25
  import org.embulk.spi.util.FileInputInputStream;
@@ -93,48 +96,63 @@ public class JsonpathParserPlugin
93
96
  try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
94
97
  ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, schema, pageBuilder, timestampParsers);
95
98
 
96
- try (FileInputInputStream is = new FileInputInputStream(input)) {
97
- while (is.nextFile()) {
98
- // TODO more efficient handling.
99
- Value value;
100
- String json = JsonPath.read(is, jsonRoot).toString();
99
+ FileInputInputStream is = new FileInputInputStream(input);
100
+ while (is.nextFile()) {
101
+ Value value;
102
+ try {
103
+ String json;
104
+ try {
105
+ json = JsonPath.read(is, jsonRoot).toString();
106
+ }
107
+ catch (IOException e) {
108
+ throw Throwables.propagate(e);
109
+ }
110
+ catch (PathNotFoundException e) {
111
+ throw new DataException(String.format(Locale.ENGLISH, "Failed to get json root reason = %s",
112
+ e.getMessage()));
113
+ }
114
+
101
115
  try {
102
116
  value = jsonParser.parse(json);
103
117
  }
104
- catch (Exception ex) {
105
- logger.error(String.format(Locale.ENGLISH, "Parse failed input data = '%s'", json));
106
- throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'", ex.getMessage(), json));
118
+ catch (JsonParseException e) {
119
+ throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'",
120
+ e.getMessage(), json));
107
121
  }
108
122
 
109
123
  if (!value.isArrayValue()) {
110
124
  throw new JsonRecordValidateException("Json string is not representing array value.");
111
125
  }
126
+ }
127
+ catch (DataException e) {
128
+ skipOrThrow(e, stopOnInvalidRecord);
129
+ continue;
130
+ }
112
131
 
113
- for (Value recordValue : value.asArrayValue()) {
114
- if (!recordValue.isMapValue()) {
115
- if (stopOnInvalidRecord) {
116
- throw new JsonRecordValidateException("Json string is not representing map value.");
117
- }
118
- logger.warn(String.format(ENGLISH, "Skipped invalid record %s", recordValue));
119
- continue;
120
- }
132
+ for (Value recordValue : value.asArrayValue()) {
133
+ if (!recordValue.isMapValue()) {
134
+ skipOrThrow(new JsonRecordValidateException("Json string is not representing map value."),
135
+ stopOnInvalidRecord);
136
+ continue;
137
+ }
121
138
 
122
- logger.debug("recordValue = " + recordValue.toString());
123
- final Map<Value, Value> record = recordValue.asMapValue().map();
139
+ logger.debug("recordValue = " + recordValue.toString());
140
+ final Map<Value, Value> record = recordValue.asMapValue().map();
141
+ try {
124
142
  for (Column column : schema.getColumns()) {
125
143
  Value v = record.get(getColumnNameValue(column));
126
144
  visitor.setValue(v);
127
145
  column.visit(visitor);
128
146
  }
129
-
130
- pageBuilder.addRecord();
131
147
  }
148
+ catch (DataException e) {
149
+ skipOrThrow(e, stopOnInvalidRecord);
150
+ continue;
151
+ }
152
+
153
+ pageBuilder.addRecord();
132
154
  }
133
155
  }
134
- catch (IOException e) {
135
- // TODO more efficient exception handling.
136
- throw new DataException("catch IOException " + e);
137
- }
138
156
 
139
157
  pageBuilder.finish();
140
158
  }
@@ -154,4 +172,12 @@ public class JsonpathParserPlugin
154
172
  {
155
173
  return columnNameValues.get(column.getName());
156
174
  }
175
+
176
+ private void skipOrThrow(DataException cause, boolean stopOnInvalidRecord)
177
+ {
178
+ if (stopOnInvalidRecord) {
179
+ throw cause;
180
+ }
181
+ logger.warn(String.format(ENGLISH, "Skipped invalid record (%s)", cause));
182
+ }
157
183
  }
@@ -122,7 +122,7 @@ public class TestJsonpathParserPlugin
122
122
  }
123
123
 
124
124
  @Test
125
- public void throwDataException()
125
+ public void skipBrokenJson()
126
126
  throws Exception
127
127
  {
128
128
  SchemaConfig schema = schema(
@@ -130,6 +130,70 @@ public class TestJsonpathParserPlugin
130
130
  column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
131
131
  ConfigSource config = this.config.deepCopy().set("columns", schema);
132
132
 
133
+ transaction(config, fileInput("BROKEN"));
134
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
135
+ assertEquals(0, records.size());
136
+ }
137
+
138
+ @Test
139
+ public void skipBrokenColumn()
140
+ throws Exception
141
+ {
142
+ SchemaConfig schema = schema(column("_c1", TIMESTAMP));
143
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
144
+ set("stop_on_invalid_record", false);
145
+
146
+ transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
147
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
148
+ assertEquals(0, records.size());
149
+ }
150
+
151
+ @Test
152
+ public void stopOnBrokenJson()
153
+ throws Exception
154
+ {
155
+ SchemaConfig schema = schema(
156
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
157
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
158
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
159
+ set("stop_on_invalid_record", true);
160
+
161
+ try {
162
+ transaction(config, fileInput("BROKEN"));
163
+ fail();
164
+ }
165
+ catch (Throwable t) {
166
+ assertTrue(t instanceof DataException);
167
+ }
168
+ }
169
+
170
+ @Test
171
+ public void stopOnBrokenColumn()
172
+ throws Exception
173
+ {
174
+ SchemaConfig schema = schema(column("_c1", TIMESTAMP));
175
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
176
+ set("stop_on_invalid_record", true);
177
+
178
+ try {
179
+ transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
180
+ fail();
181
+ }
182
+ catch (Throwable t) {
183
+ assertTrue(t instanceof DataException);
184
+ }
185
+ }
186
+
187
+ @Test
188
+ public void throwDataException()
189
+ throws Exception
190
+ {
191
+ SchemaConfig schema = schema(
192
+ column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
193
+ column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
194
+ ConfigSource config = this.config.deepCopy().set("columns", schema).
195
+ set("stop_on_invalid_record", true);
196
+
133
197
  try {
134
198
  transaction(config, fileInput(
135
199
  "\"not_map_value\""
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-jsonpath
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroyuki Sato
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-28 00:00:00.000000000 Z
11
+ date: 2017-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -110,7 +110,7 @@ files:
110
110
  - src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java
111
111
  - classpath/accessors-smart-1.1.jar
112
112
  - classpath/asm-5.0.3.jar
113
- - classpath/embulk-parser-jsonpath-0.1.2.jar
113
+ - classpath/embulk-parser-jsonpath-0.1.3.jar
114
114
  - classpath/json-path-2.2.0.jar
115
115
  - classpath/json-smart-2.2.1.jar
116
116
  - classpath/slf4j-api-1.7.16.jar