embulk-parser-jsonpath 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 183e0da4c58eb9f093c8f2f650b3b405e43cf753
|
4
|
+
data.tar.gz: 5bb2a2cfa9045013efc7b1b4dd2b48f03dc6956d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 858fd75dadb25d362e479669df01c68b2b9b521788824da5c48d3f6c684376a8969176f1fcfb8c8c8efcccb3ad38602b79c14ff503ca2b1c87344ef1298573a1
|
7
|
+
data.tar.gz: 17e5e84ccb3e7441a096fc35d451e0c544364021ae048038f8f7f267fff5da4d8ddc5c9bae0a5e3aa5f476438a371b6d9dff60642cdf8b8d76ac09b1a749c00f
|
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
package org.embulk.parser.jsonpath;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Throwables;
|
4
5
|
import com.google.common.collect.ImmutableMap;
|
5
6
|
import com.jayway.jsonpath.JsonPath;
|
7
|
+
import com.jayway.jsonpath.PathNotFoundException;
|
6
8
|
import org.embulk.config.Config;
|
7
9
|
import org.embulk.config.ConfigDefault;
|
8
10
|
import org.embulk.config.ConfigSource;
|
@@ -17,6 +19,7 @@ import org.embulk.spi.PageOutput;
|
|
17
19
|
import org.embulk.spi.ParserPlugin;
|
18
20
|
import org.embulk.spi.Schema;
|
19
21
|
import org.embulk.spi.SchemaConfig;
|
22
|
+
import org.embulk.spi.json.JsonParseException;
|
20
23
|
import org.embulk.spi.json.JsonParser;
|
21
24
|
import org.embulk.spi.time.TimestampParser;
|
22
25
|
import org.embulk.spi.util.FileInputInputStream;
|
@@ -93,48 +96,63 @@ public class JsonpathParserPlugin
|
|
93
96
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
94
97
|
ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, schema, pageBuilder, timestampParsers);
|
95
98
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
String json
|
99
|
+
FileInputInputStream is = new FileInputInputStream(input);
|
100
|
+
while (is.nextFile()) {
|
101
|
+
Value value;
|
102
|
+
try {
|
103
|
+
String json;
|
104
|
+
try {
|
105
|
+
json = JsonPath.read(is, jsonRoot).toString();
|
106
|
+
}
|
107
|
+
catch (IOException e) {
|
108
|
+
throw Throwables.propagate(e);
|
109
|
+
}
|
110
|
+
catch (PathNotFoundException e) {
|
111
|
+
throw new DataException(String.format(Locale.ENGLISH, "Failed to get json root reason = %s",
|
112
|
+
e.getMessage()));
|
113
|
+
}
|
114
|
+
|
101
115
|
try {
|
102
116
|
value = jsonParser.parse(json);
|
103
117
|
}
|
104
|
-
catch (
|
105
|
-
|
106
|
-
|
118
|
+
catch (JsonParseException e) {
|
119
|
+
throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'",
|
120
|
+
e.getMessage(), json));
|
107
121
|
}
|
108
122
|
|
109
123
|
if (!value.isArrayValue()) {
|
110
124
|
throw new JsonRecordValidateException("Json string is not representing array value.");
|
111
125
|
}
|
126
|
+
}
|
127
|
+
catch (DataException e) {
|
128
|
+
skipOrThrow(e, stopOnInvalidRecord);
|
129
|
+
continue;
|
130
|
+
}
|
112
131
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
continue;
|
120
|
-
}
|
132
|
+
for (Value recordValue : value.asArrayValue()) {
|
133
|
+
if (!recordValue.isMapValue()) {
|
134
|
+
skipOrThrow(new JsonRecordValidateException("Json string is not representing map value."),
|
135
|
+
stopOnInvalidRecord);
|
136
|
+
continue;
|
137
|
+
}
|
121
138
|
|
122
|
-
|
123
|
-
|
139
|
+
logger.debug("recordValue = " + recordValue.toString());
|
140
|
+
final Map<Value, Value> record = recordValue.asMapValue().map();
|
141
|
+
try {
|
124
142
|
for (Column column : schema.getColumns()) {
|
125
143
|
Value v = record.get(getColumnNameValue(column));
|
126
144
|
visitor.setValue(v);
|
127
145
|
column.visit(visitor);
|
128
146
|
}
|
129
|
-
|
130
|
-
pageBuilder.addRecord();
|
131
147
|
}
|
148
|
+
catch (DataException e) {
|
149
|
+
skipOrThrow(e, stopOnInvalidRecord);
|
150
|
+
continue;
|
151
|
+
}
|
152
|
+
|
153
|
+
pageBuilder.addRecord();
|
132
154
|
}
|
133
155
|
}
|
134
|
-
catch (IOException e) {
|
135
|
-
// TODO more efficient exception handling.
|
136
|
-
throw new DataException("catch IOException " + e);
|
137
|
-
}
|
138
156
|
|
139
157
|
pageBuilder.finish();
|
140
158
|
}
|
@@ -154,4 +172,12 @@ public class JsonpathParserPlugin
|
|
154
172
|
{
|
155
173
|
return columnNameValues.get(column.getName());
|
156
174
|
}
|
175
|
+
|
176
|
+
private void skipOrThrow(DataException cause, boolean stopOnInvalidRecord)
|
177
|
+
{
|
178
|
+
if (stopOnInvalidRecord) {
|
179
|
+
throw cause;
|
180
|
+
}
|
181
|
+
logger.warn(String.format(ENGLISH, "Skipped invalid record (%s)", cause));
|
182
|
+
}
|
157
183
|
}
|
@@ -122,7 +122,7 @@ public class TestJsonpathParserPlugin
|
|
122
122
|
}
|
123
123
|
|
124
124
|
@Test
|
125
|
-
public void
|
125
|
+
public void skipBrokenJson()
|
126
126
|
throws Exception
|
127
127
|
{
|
128
128
|
SchemaConfig schema = schema(
|
@@ -130,6 +130,70 @@ public class TestJsonpathParserPlugin
|
|
130
130
|
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
131
131
|
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
132
132
|
|
133
|
+
transaction(config, fileInput("BROKEN"));
|
134
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
135
|
+
assertEquals(0, records.size());
|
136
|
+
}
|
137
|
+
|
138
|
+
@Test
|
139
|
+
public void skipBrokenColumn()
|
140
|
+
throws Exception
|
141
|
+
{
|
142
|
+
SchemaConfig schema = schema(column("_c1", TIMESTAMP));
|
143
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
144
|
+
set("stop_on_invalid_record", false);
|
145
|
+
|
146
|
+
transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
|
147
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
148
|
+
assertEquals(0, records.size());
|
149
|
+
}
|
150
|
+
|
151
|
+
@Test
|
152
|
+
public void stopOnBrokenJson()
|
153
|
+
throws Exception
|
154
|
+
{
|
155
|
+
SchemaConfig schema = schema(
|
156
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
157
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
158
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
159
|
+
set("stop_on_invalid_record", true);
|
160
|
+
|
161
|
+
try {
|
162
|
+
transaction(config, fileInput("BROKEN"));
|
163
|
+
fail();
|
164
|
+
}
|
165
|
+
catch (Throwable t) {
|
166
|
+
assertTrue(t instanceof DataException);
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
@Test
|
171
|
+
public void stopOnBrokenColumn()
|
172
|
+
throws Exception
|
173
|
+
{
|
174
|
+
SchemaConfig schema = schema(column("_c1", TIMESTAMP));
|
175
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
176
|
+
set("stop_on_invalid_record", true);
|
177
|
+
|
178
|
+
try {
|
179
|
+
transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
|
180
|
+
fail();
|
181
|
+
}
|
182
|
+
catch (Throwable t) {
|
183
|
+
assertTrue(t instanceof DataException);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
@Test
|
188
|
+
public void throwDataException()
|
189
|
+
throws Exception
|
190
|
+
{
|
191
|
+
SchemaConfig schema = schema(
|
192
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
193
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
194
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
195
|
+
set("stop_on_invalid_record", true);
|
196
|
+
|
133
197
|
try {
|
134
198
|
transaction(config, fileInput(
|
135
199
|
"\"not_map_value\""
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-jsonpath
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroyuki Sato
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -110,7 +110,7 @@ files:
|
|
110
110
|
- src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java
|
111
111
|
- classpath/accessors-smart-1.1.jar
|
112
112
|
- classpath/asm-5.0.3.jar
|
113
|
-
- classpath/embulk-parser-jsonpath-0.1.
|
113
|
+
- classpath/embulk-parser-jsonpath-0.1.3.jar
|
114
114
|
- classpath/json-path-2.2.0.jar
|
115
115
|
- classpath/json-smart-2.2.1.jar
|
116
116
|
- classpath/slf4j-api-1.7.16.jar
|