embulk-filter-expand_json 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 97eabab5d6f09e27e48f707d5c0a791ca6e7e75b
4
- data.tar.gz: e69f5076385814eaa587b5ab0c0abdf036e53eaa
3
+ metadata.gz: 718512990aa52e1733712a39c856c355b57516ed
4
+ data.tar.gz: 30763a006d651e9a71226d9c09748724e902d067
5
5
  SHA512:
6
- metadata.gz: 359fe77037b220b4cee280e84c4ecd58374c5f0b22bc6acf868f1969c92168ac6840a2f190ccda81a33ea2afda1f51931ca1a00189a1de9a4b1b9e26c0e9c062
7
- data.tar.gz: 190ff72b88236bb85369dafd69b93b80c9c8a08a843057a11cd085ac02e6c89a5f60dd63503cd8a46f9dedb1dbcd8ab5099fc341f86f12136fff8029506c5671
6
+ metadata.gz: 90bc0426d394ac7d5b56ceab6d5e76a790545322836d6efe1db164df994a831e88845df36de62f351acc5d659daa8759d1bb7344a27bfd11c05404f075262b3e
7
+ data.tar.gz: afca72527e51e2ce6db90d65d959795a2c14955f95f3d64018a2ffa0855d59e93c8e030eaa6999fda6152daf896838efc4d97efdaf36b99ffb09f09ae4e4c235
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ 0.1.0 (2016-04-27)
2
+ ==================
3
+ - [Incompatible Change]: Add stop_on_invalid_record option
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/15
5
+ - https://github.com/civitaspo/embulk-filter-expand_json/issues/14
6
+
1
7
  0.0.6 (2016-03-17)
2
8
  ==================
3
9
  - [Add] Support JSON type
data/README.md CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
17
17
  - **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
18
18
  - **type**: type of the column (see below)
19
19
  - **format**: format of the timestamp if type is timestamp
20
+ - **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
20
21
 
21
22
  ---
22
23
  **type of the column**
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.0.6"
18
+ version = "0.1.0"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -41,6 +41,10 @@ public class ExpandJsonFilterPlugin
41
41
  @Config("time_zone")
42
42
  @ConfigDefault("\"UTC\"")
43
43
  public String getTimeZone();
44
+
45
+ @Config("stop_on_invalid_record")
46
+ @ConfigDefault("false")
47
+ boolean getStopOnInvalidRecord();
44
48
  }
45
49
 
46
50
  @Override
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
3
3
  import com.fasterxml.jackson.core.JsonProcessingException;
4
4
  import com.fasterxml.jackson.databind.ObjectMapper;
5
5
  import com.google.common.base.Optional;
6
+ import com.google.common.base.Strings;
6
7
  import com.google.common.base.Throwables;
7
8
  import com.google.common.collect.ImmutableList;
8
9
  import com.jayway.jsonpath.Configuration;
@@ -12,6 +13,7 @@ import com.jayway.jsonpath.ParseContext;
12
13
  import com.jayway.jsonpath.ReadContext;
13
14
  import org.embulk.spi.Column;
14
15
  import org.embulk.spi.ColumnConfig;
16
+ import org.embulk.spi.DataException;
15
17
  import org.embulk.spi.Exec;
16
18
  import org.embulk.spi.Page;
17
19
  import org.embulk.spi.PageBuilder;
@@ -105,6 +107,7 @@ public class FilteredPageOutput
105
107
 
106
108
 
107
109
  private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
110
+ private final boolean stopOnInvalidRecord;
108
111
  private final List<UnchangedColumn> unchangedColumns;
109
112
  private final List<ExpandedColumn> expandedColumns;
110
113
  private final Column jsonColumn;
@@ -187,6 +190,7 @@ public class FilteredPageOutput
187
190
 
188
191
  FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
189
192
  {
193
+ this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
190
194
  this.jsonColumn = initializeJsonColumn(task, inputSchema);
191
195
  this.unchangedColumns = initializeUnchangedColumns(inputSchema,
192
196
  outputSchema,
@@ -202,18 +206,19 @@ public class FilteredPageOutput
202
206
  @Override
203
207
  public void add(Page page)
204
208
  {
205
- try {
206
- pageReader.setPage(page);
207
-
208
- while (pageReader.nextRecord()) {
209
+ pageReader.setPage(page);
210
+ while (pageReader.nextRecord()) {
211
+ try {
209
212
  setExpandedJsonColumns();
210
213
  setUnchangedColumns();
211
214
  pageBuilder.addRecord();
212
215
  }
213
- }
214
- catch (JsonProcessingException e) {
215
- logger.error(e.getMessage());
216
- throw Throwables.propagate(e);
216
+ catch (DataException | JsonProcessingException e) {
217
+ if (stopOnInvalidRecord) {
218
+ throw new DataException(String.format("Found an invalid record"), e);
219
+ }
220
+ logger.warn(String.format("Skipped an invalid record (%s)", e.getMessage()));
221
+ }
217
222
  }
218
223
  }
219
224
 
@@ -266,14 +271,16 @@ public class FilteredPageOutput
266
271
  if (pageReader.isNull(jsonColumn)) {
267
272
  json = null;
268
273
  }
269
- else if (jsonColumn.getType() == Types.JSON) {
270
- // TODO could use Value object directly and optimize this code
271
- String jsonObject = pageReader.getJson(jsonColumn).toJson();
272
- json = parseContext.parse(jsonObject);
273
- }
274
- else { // Types.STRING
275
- String jsonObject = pageReader.getString(jsonColumn);
276
- json = parseContext.parse(jsonObject);
274
+ else {
275
+ String jsonObject;
276
+ if (jsonColumn.getType().equals(Types.JSON)) {
277
+ jsonObject = pageReader.getJson(jsonColumn).toJson(); // TODO could use Value object directly and optimize this code
278
+ }
279
+ else {
280
+ jsonObject = pageReader.getString(jsonColumn);
281
+ }
282
+
283
+ json = Strings.isNullOrEmpty(jsonObject) ? null : parseContext.parse(jsonObject);
277
284
  }
278
285
 
279
286
  for (ExpandedColumn expandedJsonColumn: expandedColumns) {
@@ -12,6 +12,7 @@ import org.embulk.config.ConfigLoader;
12
12
  import org.embulk.config.ConfigSource;
13
13
  import org.embulk.config.TaskSource;
14
14
  import org.embulk.spi.Column;
15
+ import org.embulk.spi.DataException;
15
16
  import org.embulk.spi.Exec;
16
17
  import org.embulk.spi.Page;
17
18
  import org.embulk.spi.PageOutput;
@@ -21,16 +22,23 @@ import org.embulk.spi.Schema;
21
22
  import org.embulk.spi.SchemaConfigException;
22
23
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
23
24
  import org.embulk.spi.type.Type;
25
+ import org.embulk.spi.util.Pages;
24
26
  import org.junit.Before;
25
27
  import org.junit.Rule;
26
28
  import org.junit.Test;
27
29
  import org.junit.rules.ExpectedException;
30
+ import org.msgpack.value.MapValue;
28
31
  import org.msgpack.value.Value;
29
32
 
33
+ import java.util.List;
34
+
30
35
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
31
36
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
32
37
  import static org.embulk.spi.type.Types.*;
33
38
  import static org.junit.Assert.assertEquals;
39
+ import static org.junit.Assert.assertNull;
40
+ import static org.junit.Assert.assertTrue;
41
+ import static org.junit.Assert.fail;
34
42
  import static org.msgpack.value.ValueFactory.newArray;
35
43
  import static org.msgpack.value.ValueFactory.newBoolean;
36
44
  import static org.msgpack.value.ValueFactory.newFloat;
@@ -181,6 +189,73 @@ public class TestExpandJsonFilterPlugin
181
189
  Expand Test
182
190
  */
183
191
 
192
+ @Test
193
+ public void testStopOnInvalidRecordOption()
194
+ {
195
+ String configYaml = "" +
196
+ "type: expand_json\n" +
197
+ "json_column_name: _c0\n" +
198
+ "root: $.\n" +
199
+ "expanded_columns:\n" +
200
+ " - {name: _e0, type: json}\n";
201
+ final ConfigSource conf = getConfigFromYaml(configYaml);
202
+ final Schema schema = schema("_c0", STRING);
203
+
204
+ { // stop_on_invalid_record: false
205
+ ConfigSource config = conf.deepCopy();
206
+
207
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
208
+ {
209
+ @Override
210
+ public void run(TaskSource taskSource, Schema outputSchema)
211
+ {
212
+ MockPageOutput mockPageOutput = new MockPageOutput();
213
+
214
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
215
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
216
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
217
+ pageOutput.add(page);
218
+ }
219
+
220
+ pageOutput.finish();
221
+ }
222
+
223
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
224
+ assertEquals(1, records.size());
225
+ assertEquals(0, ((MapValue) records.get(0)[0]).size()); // {}
226
+ }
227
+ });
228
+ }
229
+
230
+ { // stop_on_invalid_record: true
231
+ ConfigSource config = conf.deepCopy().set("stop_on_invalid_record", true);
232
+
233
+ try {
234
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
235
+ {
236
+ @Override
237
+ public void run(TaskSource taskSource, Schema outputSchema)
238
+ {
239
+ MockPageOutput mockPageOutput = new MockPageOutput();
240
+
241
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
242
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
243
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
244
+ pageOutput.add(page);
245
+ }
246
+
247
+ pageOutput.finish();
248
+ }
249
+ }
250
+ });
251
+ fail();
252
+ }
253
+ catch (Throwable t) {
254
+ assertTrue(t instanceof DataException);
255
+ }
256
+ }
257
+ }
258
+
184
259
  @Test
185
260
  public void testExpandJsonKeyToSchema()
186
261
  {
@@ -328,6 +403,45 @@ public class TestExpandJsonFilterPlugin
328
403
  });
329
404
  }
330
405
 
406
+ @Test
407
+ public void testExpandSpecialJsonValuesFromString()
408
+ {
409
+ final String configYaml = "" +
410
+ "type: expand_json\n" +
411
+ "json_column_name: _c1\n" +
412
+ "root: $.\n" +
413
+ "expanded_columns:\n" +
414
+ " - {name: _e0, type: string}\n" +
415
+ " - {name: _e1, type: string}\n"; // the value will be null
416
+
417
+ ConfigSource config = getConfigFromYaml(configYaml);
418
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
419
+
420
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
421
+ {
422
+ @Override
423
+ public void run(TaskSource taskSource, Schema outputSchema)
424
+ {
425
+ MockPageOutput mockPageOutput = new MockPageOutput();
426
+
427
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
428
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
429
+ "_v0", "")) {
430
+ pageOutput.add(page);
431
+ }
432
+
433
+ pageOutput.finish();
434
+ }
435
+
436
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
437
+ assertEquals("_v0", record[0]);
438
+ assertNull(record[1]);
439
+ assertNull(record[2]);
440
+ }
441
+ }
442
+ });
443
+ }
444
+
331
445
  private static Value s(String value)
332
446
  {
333
447
  return newString(value);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-16 00:00:00.000000000 Z
11
+ date: 2016-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
66
66
  - classpath/accessors-smart-1.1.jar
67
67
  - classpath/asm-5.0.3.jar
68
- - classpath/embulk-filter-expand_json-0.0.6.jar
68
+ - classpath/embulk-filter-expand_json-0.1.0.jar
69
69
  - classpath/json-path-2.2.0.jar
70
70
  - classpath/json-smart-2.2.1.jar
71
71
  - classpath/slf4j-api-1.7.16.jar