embulk-filter-expand_json 0.0.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +4 -0
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +23 -16
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +114 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 718512990aa52e1733712a39c856c355b57516ed
|
4
|
+
data.tar.gz: 30763a006d651e9a71226d9c09748724e902d067
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90bc0426d394ac7d5b56ceab6d5e76a790545322836d6efe1db164df994a831e88845df36de62f351acc5d659daa8759d1bb7344a27bfd11c05404f075262b3e
|
7
|
+
data.tar.gz: afca72527e51e2ce6db90d65d959795a2c14955f95f3d64018a2ffa0855d59e93c8e030eaa6999fda6152daf896838efc4d97efdaf36b99ffb09f09ae4e4c235
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
0.1.0 (2016-04-27)
|
2
|
+
==================
|
3
|
+
- [Incompatible Change]: Add stop_on_invalid_record option
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/15
|
5
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/issues/14
|
6
|
+
|
1
7
|
0.0.6 (2016-03-17)
|
2
8
|
==================
|
3
9
|
- [Add] Support JSON type
|
data/README.md
CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
|
|
17
17
|
- **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
|
18
18
|
- **type**: type of the column (see below)
|
19
19
|
- **format**: format of the timestamp if type is timestamp
|
20
|
+
- **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
|
20
21
|
|
21
22
|
---
|
22
23
|
**type of the column**
|
data/build.gradle
CHANGED
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
|
|
3
3
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
4
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
5
|
import com.google.common.base.Optional;
|
6
|
+
import com.google.common.base.Strings;
|
6
7
|
import com.google.common.base.Throwables;
|
7
8
|
import com.google.common.collect.ImmutableList;
|
8
9
|
import com.jayway.jsonpath.Configuration;
|
@@ -12,6 +13,7 @@ import com.jayway.jsonpath.ParseContext;
|
|
12
13
|
import com.jayway.jsonpath.ReadContext;
|
13
14
|
import org.embulk.spi.Column;
|
14
15
|
import org.embulk.spi.ColumnConfig;
|
16
|
+
import org.embulk.spi.DataException;
|
15
17
|
import org.embulk.spi.Exec;
|
16
18
|
import org.embulk.spi.Page;
|
17
19
|
import org.embulk.spi.PageBuilder;
|
@@ -105,6 +107,7 @@ public class FilteredPageOutput
|
|
105
107
|
|
106
108
|
|
107
109
|
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
110
|
+
private final boolean stopOnInvalidRecord;
|
108
111
|
private final List<UnchangedColumn> unchangedColumns;
|
109
112
|
private final List<ExpandedColumn> expandedColumns;
|
110
113
|
private final Column jsonColumn;
|
@@ -187,6 +190,7 @@ public class FilteredPageOutput
|
|
187
190
|
|
188
191
|
FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
|
189
192
|
{
|
193
|
+
this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
190
194
|
this.jsonColumn = initializeJsonColumn(task, inputSchema);
|
191
195
|
this.unchangedColumns = initializeUnchangedColumns(inputSchema,
|
192
196
|
outputSchema,
|
@@ -202,18 +206,19 @@ public class FilteredPageOutput
|
|
202
206
|
@Override
|
203
207
|
public void add(Page page)
|
204
208
|
{
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
while (pageReader.nextRecord()) {
|
209
|
+
pageReader.setPage(page);
|
210
|
+
while (pageReader.nextRecord()) {
|
211
|
+
try {
|
209
212
|
setExpandedJsonColumns();
|
210
213
|
setUnchangedColumns();
|
211
214
|
pageBuilder.addRecord();
|
212
215
|
}
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
216
|
+
catch (DataException | JsonProcessingException e) {
|
217
|
+
if (stopOnInvalidRecord) {
|
218
|
+
throw new DataException(String.format("Found an invalid record"), e);
|
219
|
+
}
|
220
|
+
logger.warn(String.format("Skipped an invalid record (%s)", e.getMessage()));
|
221
|
+
}
|
217
222
|
}
|
218
223
|
}
|
219
224
|
|
@@ -266,14 +271,16 @@ public class FilteredPageOutput
|
|
266
271
|
if (pageReader.isNull(jsonColumn)) {
|
267
272
|
json = null;
|
268
273
|
}
|
269
|
-
else
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
274
|
+
else {
|
275
|
+
String jsonObject;
|
276
|
+
if (jsonColumn.getType().equals(Types.JSON)) {
|
277
|
+
jsonObject = pageReader.getJson(jsonColumn).toJson(); // TODO could use Value object directly and optimize this code
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
jsonObject = pageReader.getString(jsonColumn);
|
281
|
+
}
|
282
|
+
|
283
|
+
json = Strings.isNullOrEmpty(jsonObject) ? null : parseContext.parse(jsonObject);
|
277
284
|
}
|
278
285
|
|
279
286
|
for (ExpandedColumn expandedJsonColumn: expandedColumns) {
|
@@ -12,6 +12,7 @@ import org.embulk.config.ConfigLoader;
|
|
12
12
|
import org.embulk.config.ConfigSource;
|
13
13
|
import org.embulk.config.TaskSource;
|
14
14
|
import org.embulk.spi.Column;
|
15
|
+
import org.embulk.spi.DataException;
|
15
16
|
import org.embulk.spi.Exec;
|
16
17
|
import org.embulk.spi.Page;
|
17
18
|
import org.embulk.spi.PageOutput;
|
@@ -21,16 +22,23 @@ import org.embulk.spi.Schema;
|
|
21
22
|
import org.embulk.spi.SchemaConfigException;
|
22
23
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
23
24
|
import org.embulk.spi.type.Type;
|
25
|
+
import org.embulk.spi.util.Pages;
|
24
26
|
import org.junit.Before;
|
25
27
|
import org.junit.Rule;
|
26
28
|
import org.junit.Test;
|
27
29
|
import org.junit.rules.ExpectedException;
|
30
|
+
import org.msgpack.value.MapValue;
|
28
31
|
import org.msgpack.value.Value;
|
29
32
|
|
33
|
+
import java.util.List;
|
34
|
+
|
30
35
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
|
31
36
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
|
32
37
|
import static org.embulk.spi.type.Types.*;
|
33
38
|
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertNull;
|
40
|
+
import static org.junit.Assert.assertTrue;
|
41
|
+
import static org.junit.Assert.fail;
|
34
42
|
import static org.msgpack.value.ValueFactory.newArray;
|
35
43
|
import static org.msgpack.value.ValueFactory.newBoolean;
|
36
44
|
import static org.msgpack.value.ValueFactory.newFloat;
|
@@ -181,6 +189,73 @@ public class TestExpandJsonFilterPlugin
|
|
181
189
|
Expand Test
|
182
190
|
*/
|
183
191
|
|
192
|
+
@Test
|
193
|
+
public void testStopOnInvalidRecordOption()
|
194
|
+
{
|
195
|
+
String configYaml = "" +
|
196
|
+
"type: expand_json\n" +
|
197
|
+
"json_column_name: _c0\n" +
|
198
|
+
"root: $.\n" +
|
199
|
+
"expanded_columns:\n" +
|
200
|
+
" - {name: _e0, type: json}\n";
|
201
|
+
final ConfigSource conf = getConfigFromYaml(configYaml);
|
202
|
+
final Schema schema = schema("_c0", STRING);
|
203
|
+
|
204
|
+
{ // stop_on_invalid_record: false
|
205
|
+
ConfigSource config = conf.deepCopy();
|
206
|
+
|
207
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
208
|
+
{
|
209
|
+
@Override
|
210
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
211
|
+
{
|
212
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
213
|
+
|
214
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
215
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
216
|
+
"{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
|
217
|
+
pageOutput.add(page);
|
218
|
+
}
|
219
|
+
|
220
|
+
pageOutput.finish();
|
221
|
+
}
|
222
|
+
|
223
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
224
|
+
assertEquals(1, records.size());
|
225
|
+
assertEquals(0, ((MapValue) records.get(0)[0]).size()); // {}
|
226
|
+
}
|
227
|
+
});
|
228
|
+
}
|
229
|
+
|
230
|
+
{ // stop_on_invalid_record: true
|
231
|
+
ConfigSource config = conf.deepCopy().set("stop_on_invalid_record", true);
|
232
|
+
|
233
|
+
try {
|
234
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
235
|
+
{
|
236
|
+
@Override
|
237
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
238
|
+
{
|
239
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
240
|
+
|
241
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
242
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
243
|
+
"{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
|
244
|
+
pageOutput.add(page);
|
245
|
+
}
|
246
|
+
|
247
|
+
pageOutput.finish();
|
248
|
+
}
|
249
|
+
}
|
250
|
+
});
|
251
|
+
fail();
|
252
|
+
}
|
253
|
+
catch (Throwable t) {
|
254
|
+
assertTrue(t instanceof DataException);
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
184
259
|
@Test
|
185
260
|
public void testExpandJsonKeyToSchema()
|
186
261
|
{
|
@@ -328,6 +403,45 @@ public class TestExpandJsonFilterPlugin
|
|
328
403
|
});
|
329
404
|
}
|
330
405
|
|
406
|
+
@Test
|
407
|
+
public void testExpandSpecialJsonValuesFromString()
|
408
|
+
{
|
409
|
+
final String configYaml = "" +
|
410
|
+
"type: expand_json\n" +
|
411
|
+
"json_column_name: _c1\n" +
|
412
|
+
"root: $.\n" +
|
413
|
+
"expanded_columns:\n" +
|
414
|
+
" - {name: _e0, type: string}\n" +
|
415
|
+
" - {name: _e1, type: string}\n"; // the value will be null
|
416
|
+
|
417
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
418
|
+
final Schema schema = schema("_c0", STRING, "_c1", STRING);
|
419
|
+
|
420
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
421
|
+
{
|
422
|
+
@Override
|
423
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
424
|
+
{
|
425
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
426
|
+
|
427
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
428
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
429
|
+
"_v0", "")) {
|
430
|
+
pageOutput.add(page);
|
431
|
+
}
|
432
|
+
|
433
|
+
pageOutput.finish();
|
434
|
+
}
|
435
|
+
|
436
|
+
for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
|
437
|
+
assertEquals("_v0", record[0]);
|
438
|
+
assertNull(record[1]);
|
439
|
+
assertNull(record[2]);
|
440
|
+
}
|
441
|
+
}
|
442
|
+
});
|
443
|
+
}
|
444
|
+
|
331
445
|
private static Value s(String value)
|
332
446
|
{
|
333
447
|
return newString(value);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
66
|
- classpath/accessors-smart-1.1.jar
|
67
67
|
- classpath/asm-5.0.3.jar
|
68
|
-
- classpath/embulk-filter-expand_json-0.0.
|
68
|
+
- classpath/embulk-filter-expand_json-0.1.0.jar
|
69
69
|
- classpath/json-path-2.2.0.jar
|
70
70
|
- classpath/json-smart-2.2.1.jar
|
71
71
|
- classpath/slf4j-api-1.7.16.jar
|