embulk-parser-jsonpath 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 183e0da4c58eb9f093c8f2f650b3b405e43cf753
|
4
|
+
data.tar.gz: 5bb2a2cfa9045013efc7b1b4dd2b48f03dc6956d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 858fd75dadb25d362e479669df01c68b2b9b521788824da5c48d3f6c684376a8969176f1fcfb8c8c8efcccb3ad38602b79c14ff503ca2b1c87344ef1298573a1
|
7
|
+
data.tar.gz: 17e5e84ccb3e7441a096fc35d451e0c544364021ae048038f8f7f267fff5da4d8ddc5c9bae0a5e3aa5f476438a371b6d9dff60642cdf8b8d76ac09b1a749c00f
|
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
package org.embulk.parser.jsonpath;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Throwables;
|
4
5
|
import com.google.common.collect.ImmutableMap;
|
5
6
|
import com.jayway.jsonpath.JsonPath;
|
7
|
+
import com.jayway.jsonpath.PathNotFoundException;
|
6
8
|
import org.embulk.config.Config;
|
7
9
|
import org.embulk.config.ConfigDefault;
|
8
10
|
import org.embulk.config.ConfigSource;
|
@@ -17,6 +19,7 @@ import org.embulk.spi.PageOutput;
|
|
17
19
|
import org.embulk.spi.ParserPlugin;
|
18
20
|
import org.embulk.spi.Schema;
|
19
21
|
import org.embulk.spi.SchemaConfig;
|
22
|
+
import org.embulk.spi.json.JsonParseException;
|
20
23
|
import org.embulk.spi.json.JsonParser;
|
21
24
|
import org.embulk.spi.time.TimestampParser;
|
22
25
|
import org.embulk.spi.util.FileInputInputStream;
|
@@ -93,48 +96,63 @@ public class JsonpathParserPlugin
|
|
93
96
|
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
94
97
|
ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, schema, pageBuilder, timestampParsers);
|
95
98
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
String json
|
99
|
+
FileInputInputStream is = new FileInputInputStream(input);
|
100
|
+
while (is.nextFile()) {
|
101
|
+
Value value;
|
102
|
+
try {
|
103
|
+
String json;
|
104
|
+
try {
|
105
|
+
json = JsonPath.read(is, jsonRoot).toString();
|
106
|
+
}
|
107
|
+
catch (IOException e) {
|
108
|
+
throw Throwables.propagate(e);
|
109
|
+
}
|
110
|
+
catch (PathNotFoundException e) {
|
111
|
+
throw new DataException(String.format(Locale.ENGLISH, "Failed to get json root reason = %s",
|
112
|
+
e.getMessage()));
|
113
|
+
}
|
114
|
+
|
101
115
|
try {
|
102
116
|
value = jsonParser.parse(json);
|
103
117
|
}
|
104
|
-
catch (
|
105
|
-
|
106
|
-
|
118
|
+
catch (JsonParseException e) {
|
119
|
+
throw new DataException(String.format(Locale.ENGLISH, "Parse failed reason = %s, input data = '%s'",
|
120
|
+
e.getMessage(), json));
|
107
121
|
}
|
108
122
|
|
109
123
|
if (!value.isArrayValue()) {
|
110
124
|
throw new JsonRecordValidateException("Json string is not representing array value.");
|
111
125
|
}
|
126
|
+
}
|
127
|
+
catch (DataException e) {
|
128
|
+
skipOrThrow(e, stopOnInvalidRecord);
|
129
|
+
continue;
|
130
|
+
}
|
112
131
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
continue;
|
120
|
-
}
|
132
|
+
for (Value recordValue : value.asArrayValue()) {
|
133
|
+
if (!recordValue.isMapValue()) {
|
134
|
+
skipOrThrow(new JsonRecordValidateException("Json string is not representing map value."),
|
135
|
+
stopOnInvalidRecord);
|
136
|
+
continue;
|
137
|
+
}
|
121
138
|
|
122
|
-
|
123
|
-
|
139
|
+
logger.debug("recordValue = " + recordValue.toString());
|
140
|
+
final Map<Value, Value> record = recordValue.asMapValue().map();
|
141
|
+
try {
|
124
142
|
for (Column column : schema.getColumns()) {
|
125
143
|
Value v = record.get(getColumnNameValue(column));
|
126
144
|
visitor.setValue(v);
|
127
145
|
column.visit(visitor);
|
128
146
|
}
|
129
|
-
|
130
|
-
pageBuilder.addRecord();
|
131
147
|
}
|
148
|
+
catch (DataException e) {
|
149
|
+
skipOrThrow(e, stopOnInvalidRecord);
|
150
|
+
continue;
|
151
|
+
}
|
152
|
+
|
153
|
+
pageBuilder.addRecord();
|
132
154
|
}
|
133
155
|
}
|
134
|
-
catch (IOException e) {
|
135
|
-
// TODO more efficient exception handling.
|
136
|
-
throw new DataException("catch IOException " + e);
|
137
|
-
}
|
138
156
|
|
139
157
|
pageBuilder.finish();
|
140
158
|
}
|
@@ -154,4 +172,12 @@ public class JsonpathParserPlugin
|
|
154
172
|
{
|
155
173
|
return columnNameValues.get(column.getName());
|
156
174
|
}
|
175
|
+
|
176
|
+
private void skipOrThrow(DataException cause, boolean stopOnInvalidRecord)
|
177
|
+
{
|
178
|
+
if (stopOnInvalidRecord) {
|
179
|
+
throw cause;
|
180
|
+
}
|
181
|
+
logger.warn(String.format(ENGLISH, "Skipped invalid record (%s)", cause));
|
182
|
+
}
|
157
183
|
}
|
@@ -122,7 +122,7 @@ public class TestJsonpathParserPlugin
|
|
122
122
|
}
|
123
123
|
|
124
124
|
@Test
|
125
|
-
public void
|
125
|
+
public void skipBrokenJson()
|
126
126
|
throws Exception
|
127
127
|
{
|
128
128
|
SchemaConfig schema = schema(
|
@@ -130,6 +130,70 @@ public class TestJsonpathParserPlugin
|
|
130
130
|
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
131
131
|
ConfigSource config = this.config.deepCopy().set("columns", schema);
|
132
132
|
|
133
|
+
transaction(config, fileInput("BROKEN"));
|
134
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
135
|
+
assertEquals(0, records.size());
|
136
|
+
}
|
137
|
+
|
138
|
+
@Test
|
139
|
+
public void skipBrokenColumn()
|
140
|
+
throws Exception
|
141
|
+
{
|
142
|
+
SchemaConfig schema = schema(column("_c1", TIMESTAMP));
|
143
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
144
|
+
set("stop_on_invalid_record", false);
|
145
|
+
|
146
|
+
transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
|
147
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
148
|
+
assertEquals(0, records.size());
|
149
|
+
}
|
150
|
+
|
151
|
+
@Test
|
152
|
+
public void stopOnBrokenJson()
|
153
|
+
throws Exception
|
154
|
+
{
|
155
|
+
SchemaConfig schema = schema(
|
156
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
157
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
158
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
159
|
+
set("stop_on_invalid_record", true);
|
160
|
+
|
161
|
+
try {
|
162
|
+
transaction(config, fileInput("BROKEN"));
|
163
|
+
fail();
|
164
|
+
}
|
165
|
+
catch (Throwable t) {
|
166
|
+
assertTrue(t instanceof DataException);
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
@Test
|
171
|
+
public void stopOnBrokenColumn()
|
172
|
+
throws Exception
|
173
|
+
{
|
174
|
+
SchemaConfig schema = schema(column("_c1", TIMESTAMP));
|
175
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
176
|
+
set("stop_on_invalid_record", true);
|
177
|
+
|
178
|
+
try {
|
179
|
+
transaction(config, fileInput("{\"_c1\" : \"INVALID\"}"));
|
180
|
+
fail();
|
181
|
+
}
|
182
|
+
catch (Throwable t) {
|
183
|
+
assertTrue(t instanceof DataException);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
@Test
|
188
|
+
public void throwDataException()
|
189
|
+
throws Exception
|
190
|
+
{
|
191
|
+
SchemaConfig schema = schema(
|
192
|
+
column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE),
|
193
|
+
column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON));
|
194
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).
|
195
|
+
set("stop_on_invalid_record", true);
|
196
|
+
|
133
197
|
try {
|
134
198
|
transaction(config, fileInput(
|
135
199
|
"\"not_map_value\""
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-jsonpath
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroyuki Sato
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -110,7 +110,7 @@ files:
|
|
110
110
|
- src/test/java/org/embulk/parser/jsonpath/cast/TestStringCast.java
|
111
111
|
- classpath/accessors-smart-1.1.jar
|
112
112
|
- classpath/asm-5.0.3.jar
|
113
|
-
- classpath/embulk-parser-jsonpath-0.1.
|
113
|
+
- classpath/embulk-parser-jsonpath-0.1.3.jar
|
114
114
|
- classpath/json-path-2.2.0.jar
|
115
115
|
- classpath/json-smart-2.2.1.jar
|
116
116
|
- classpath/slf4j-api-1.7.16.jar
|