embulk-filter-expand_json 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +4 -0
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +23 -16
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +114 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 718512990aa52e1733712a39c856c355b57516ed
|
4
|
+
data.tar.gz: 30763a006d651e9a71226d9c09748724e902d067
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90bc0426d394ac7d5b56ceab6d5e76a790545322836d6efe1db164df994a831e88845df36de62f351acc5d659daa8759d1bb7344a27bfd11c05404f075262b3e
|
7
|
+
data.tar.gz: afca72527e51e2ce6db90d65d959795a2c14955f95f3d64018a2ffa0855d59e93c8e030eaa6999fda6152daf896838efc4d97efdaf36b99ffb09f09ae4e4c235
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
0.1.0 (2016-04-27)
|
2
|
+
==================
|
3
|
+
- [Incompatible Change]: Add stop_on_invalid_record option
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/15
|
5
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/issues/14
|
6
|
+
|
1
7
|
0.0.6 (2016-03-17)
|
2
8
|
==================
|
3
9
|
- [Add] Support JSON type
|
data/README.md
CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
|
|
17
17
|
- **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
|
18
18
|
- **type**: type of the column (see below)
|
19
19
|
- **format**: format of the timestamp if type is timestamp
|
20
|
+
- **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
|
20
21
|
|
21
22
|
---
|
22
23
|
**type of the column**
|
data/build.gradle
CHANGED
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
|
|
3
3
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
4
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
5
|
import com.google.common.base.Optional;
|
6
|
+
import com.google.common.base.Strings;
|
6
7
|
import com.google.common.base.Throwables;
|
7
8
|
import com.google.common.collect.ImmutableList;
|
8
9
|
import com.jayway.jsonpath.Configuration;
|
@@ -12,6 +13,7 @@ import com.jayway.jsonpath.ParseContext;
|
|
12
13
|
import com.jayway.jsonpath.ReadContext;
|
13
14
|
import org.embulk.spi.Column;
|
14
15
|
import org.embulk.spi.ColumnConfig;
|
16
|
+
import org.embulk.spi.DataException;
|
15
17
|
import org.embulk.spi.Exec;
|
16
18
|
import org.embulk.spi.Page;
|
17
19
|
import org.embulk.spi.PageBuilder;
|
@@ -105,6 +107,7 @@ public class FilteredPageOutput
|
|
105
107
|
|
106
108
|
|
107
109
|
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
110
|
+
private final boolean stopOnInvalidRecord;
|
108
111
|
private final List<UnchangedColumn> unchangedColumns;
|
109
112
|
private final List<ExpandedColumn> expandedColumns;
|
110
113
|
private final Column jsonColumn;
|
@@ -187,6 +190,7 @@ public class FilteredPageOutput
|
|
187
190
|
|
188
191
|
FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
|
189
192
|
{
|
193
|
+
this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
190
194
|
this.jsonColumn = initializeJsonColumn(task, inputSchema);
|
191
195
|
this.unchangedColumns = initializeUnchangedColumns(inputSchema,
|
192
196
|
outputSchema,
|
@@ -202,18 +206,19 @@ public class FilteredPageOutput
|
|
202
206
|
@Override
|
203
207
|
public void add(Page page)
|
204
208
|
{
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
while (pageReader.nextRecord()) {
|
209
|
+
pageReader.setPage(page);
|
210
|
+
while (pageReader.nextRecord()) {
|
211
|
+
try {
|
209
212
|
setExpandedJsonColumns();
|
210
213
|
setUnchangedColumns();
|
211
214
|
pageBuilder.addRecord();
|
212
215
|
}
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
216
|
+
catch (DataException | JsonProcessingException e) {
|
217
|
+
if (stopOnInvalidRecord) {
|
218
|
+
throw new DataException(String.format("Found an invalid record"), e);
|
219
|
+
}
|
220
|
+
logger.warn(String.format("Skipped an invalid record (%s)", e.getMessage()));
|
221
|
+
}
|
217
222
|
}
|
218
223
|
}
|
219
224
|
|
@@ -266,14 +271,16 @@ public class FilteredPageOutput
|
|
266
271
|
if (pageReader.isNull(jsonColumn)) {
|
267
272
|
json = null;
|
268
273
|
}
|
269
|
-
else
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
274
|
+
else {
|
275
|
+
String jsonObject;
|
276
|
+
if (jsonColumn.getType().equals(Types.JSON)) {
|
277
|
+
jsonObject = pageReader.getJson(jsonColumn).toJson(); // TODO could use Value object directly and optimize this code
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
jsonObject = pageReader.getString(jsonColumn);
|
281
|
+
}
|
282
|
+
|
283
|
+
json = Strings.isNullOrEmpty(jsonObject) ? null : parseContext.parse(jsonObject);
|
277
284
|
}
|
278
285
|
|
279
286
|
for (ExpandedColumn expandedJsonColumn: expandedColumns) {
|
@@ -12,6 +12,7 @@ import org.embulk.config.ConfigLoader;
|
|
12
12
|
import org.embulk.config.ConfigSource;
|
13
13
|
import org.embulk.config.TaskSource;
|
14
14
|
import org.embulk.spi.Column;
|
15
|
+
import org.embulk.spi.DataException;
|
15
16
|
import org.embulk.spi.Exec;
|
16
17
|
import org.embulk.spi.Page;
|
17
18
|
import org.embulk.spi.PageOutput;
|
@@ -21,16 +22,23 @@ import org.embulk.spi.Schema;
|
|
21
22
|
import org.embulk.spi.SchemaConfigException;
|
22
23
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
23
24
|
import org.embulk.spi.type.Type;
|
25
|
+
import org.embulk.spi.util.Pages;
|
24
26
|
import org.junit.Before;
|
25
27
|
import org.junit.Rule;
|
26
28
|
import org.junit.Test;
|
27
29
|
import org.junit.rules.ExpectedException;
|
30
|
+
import org.msgpack.value.MapValue;
|
28
31
|
import org.msgpack.value.Value;
|
29
32
|
|
33
|
+
import java.util.List;
|
34
|
+
|
30
35
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
|
31
36
|
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
|
32
37
|
import static org.embulk.spi.type.Types.*;
|
33
38
|
import static org.junit.Assert.assertEquals;
|
39
|
+
import static org.junit.Assert.assertNull;
|
40
|
+
import static org.junit.Assert.assertTrue;
|
41
|
+
import static org.junit.Assert.fail;
|
34
42
|
import static org.msgpack.value.ValueFactory.newArray;
|
35
43
|
import static org.msgpack.value.ValueFactory.newBoolean;
|
36
44
|
import static org.msgpack.value.ValueFactory.newFloat;
|
@@ -181,6 +189,73 @@ public class TestExpandJsonFilterPlugin
|
|
181
189
|
Expand Test
|
182
190
|
*/
|
183
191
|
|
192
|
+
@Test
|
193
|
+
public void testStopOnInvalidRecordOption()
|
194
|
+
{
|
195
|
+
String configYaml = "" +
|
196
|
+
"type: expand_json\n" +
|
197
|
+
"json_column_name: _c0\n" +
|
198
|
+
"root: $.\n" +
|
199
|
+
"expanded_columns:\n" +
|
200
|
+
" - {name: _e0, type: json}\n";
|
201
|
+
final ConfigSource conf = getConfigFromYaml(configYaml);
|
202
|
+
final Schema schema = schema("_c0", STRING);
|
203
|
+
|
204
|
+
{ // stop_on_invalid_record: false
|
205
|
+
ConfigSource config = conf.deepCopy();
|
206
|
+
|
207
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
208
|
+
{
|
209
|
+
@Override
|
210
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
211
|
+
{
|
212
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
213
|
+
|
214
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
215
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
216
|
+
"{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
|
217
|
+
pageOutput.add(page);
|
218
|
+
}
|
219
|
+
|
220
|
+
pageOutput.finish();
|
221
|
+
}
|
222
|
+
|
223
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
224
|
+
assertEquals(1, records.size());
|
225
|
+
assertEquals(0, ((MapValue) records.get(0)[0]).size()); // {}
|
226
|
+
}
|
227
|
+
});
|
228
|
+
}
|
229
|
+
|
230
|
+
{ // stop_on_invalid_record: true
|
231
|
+
ConfigSource config = conf.deepCopy().set("stop_on_invalid_record", true);
|
232
|
+
|
233
|
+
try {
|
234
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
235
|
+
{
|
236
|
+
@Override
|
237
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
238
|
+
{
|
239
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
240
|
+
|
241
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
242
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
243
|
+
"{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
|
244
|
+
pageOutput.add(page);
|
245
|
+
}
|
246
|
+
|
247
|
+
pageOutput.finish();
|
248
|
+
}
|
249
|
+
}
|
250
|
+
});
|
251
|
+
fail();
|
252
|
+
}
|
253
|
+
catch (Throwable t) {
|
254
|
+
assertTrue(t instanceof DataException);
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
184
259
|
@Test
|
185
260
|
public void testExpandJsonKeyToSchema()
|
186
261
|
{
|
@@ -328,6 +403,45 @@ public class TestExpandJsonFilterPlugin
|
|
328
403
|
});
|
329
404
|
}
|
330
405
|
|
406
|
+
@Test
|
407
|
+
public void testExpandSpecialJsonValuesFromString()
|
408
|
+
{
|
409
|
+
final String configYaml = "" +
|
410
|
+
"type: expand_json\n" +
|
411
|
+
"json_column_name: _c1\n" +
|
412
|
+
"root: $.\n" +
|
413
|
+
"expanded_columns:\n" +
|
414
|
+
" - {name: _e0, type: string}\n" +
|
415
|
+
" - {name: _e1, type: string}\n"; // the value will be null
|
416
|
+
|
417
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
418
|
+
final Schema schema = schema("_c0", STRING, "_c1", STRING);
|
419
|
+
|
420
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
421
|
+
{
|
422
|
+
@Override
|
423
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
424
|
+
{
|
425
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
426
|
+
|
427
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
428
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
429
|
+
"_v0", "")) {
|
430
|
+
pageOutput.add(page);
|
431
|
+
}
|
432
|
+
|
433
|
+
pageOutput.finish();
|
434
|
+
}
|
435
|
+
|
436
|
+
for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
|
437
|
+
assertEquals("_v0", record[0]);
|
438
|
+
assertNull(record[1]);
|
439
|
+
assertNull(record[2]);
|
440
|
+
}
|
441
|
+
}
|
442
|
+
});
|
443
|
+
}
|
444
|
+
|
331
445
|
private static Value s(String value)
|
332
446
|
{
|
333
447
|
return newString(value);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
66
|
- classpath/accessors-smart-1.1.jar
|
67
67
|
- classpath/asm-5.0.3.jar
|
68
|
-
- classpath/embulk-filter-expand_json-0.0.
|
68
|
+
- classpath/embulk-filter-expand_json-0.1.0.jar
|
69
69
|
- classpath/json-path-2.2.0.jar
|
70
70
|
- classpath/json-smart-2.2.1.jar
|
71
71
|
- classpath/slf4j-api-1.7.16.jar
|