embulk-filter-expand_json 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 97eabab5d6f09e27e48f707d5c0a791ca6e7e75b
4
- data.tar.gz: e69f5076385814eaa587b5ab0c0abdf036e53eaa
3
+ metadata.gz: 718512990aa52e1733712a39c856c355b57516ed
4
+ data.tar.gz: 30763a006d651e9a71226d9c09748724e902d067
5
5
  SHA512:
6
- metadata.gz: 359fe77037b220b4cee280e84c4ecd58374c5f0b22bc6acf868f1969c92168ac6840a2f190ccda81a33ea2afda1f51931ca1a00189a1de9a4b1b9e26c0e9c062
7
- data.tar.gz: 190ff72b88236bb85369dafd69b93b80c9c8a08a843057a11cd085ac02e6c89a5f60dd63503cd8a46f9dedb1dbcd8ab5099fc341f86f12136fff8029506c5671
6
+ metadata.gz: 90bc0426d394ac7d5b56ceab6d5e76a790545322836d6efe1db164df994a831e88845df36de62f351acc5d659daa8759d1bb7344a27bfd11c05404f075262b3e
7
+ data.tar.gz: afca72527e51e2ce6db90d65d959795a2c14955f95f3d64018a2ffa0855d59e93c8e030eaa6999fda6152daf896838efc4d97efdaf36b99ffb09f09ae4e4c235
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ 0.1.0 (2016-04-27)
2
+ ==================
3
+ - [Incompatible Change]: Add stop_on_invalid_record option
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/15
5
+ - https://github.com/civitaspo/embulk-filter-expand_json/issues/14
6
+
1
7
  0.0.6 (2016-03-17)
2
8
  ==================
3
9
  - [Add] Support JSON type
data/README.md CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
17
17
  - **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
18
18
  - **type**: type of the column (see below)
19
19
  - **format**: format of the timestamp if type is timestamp
20
+ - **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
20
21
 
21
22
  ---
22
23
  **type of the column**
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.0.6"
18
+ version = "0.1.0"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -41,6 +41,10 @@ public class ExpandJsonFilterPlugin
41
41
  @Config("time_zone")
42
42
  @ConfigDefault("\"UTC\"")
43
43
  public String getTimeZone();
44
+
45
+ @Config("stop_on_invalid_record")
46
+ @ConfigDefault("false")
47
+ boolean getStopOnInvalidRecord();
44
48
  }
45
49
 
46
50
  @Override
@@ -3,6 +3,7 @@ package org.embulk.filter.expand_json;
3
3
  import com.fasterxml.jackson.core.JsonProcessingException;
4
4
  import com.fasterxml.jackson.databind.ObjectMapper;
5
5
  import com.google.common.base.Optional;
6
+ import com.google.common.base.Strings;
6
7
  import com.google.common.base.Throwables;
7
8
  import com.google.common.collect.ImmutableList;
8
9
  import com.jayway.jsonpath.Configuration;
@@ -12,6 +13,7 @@ import com.jayway.jsonpath.ParseContext;
12
13
  import com.jayway.jsonpath.ReadContext;
13
14
  import org.embulk.spi.Column;
14
15
  import org.embulk.spi.ColumnConfig;
16
+ import org.embulk.spi.DataException;
15
17
  import org.embulk.spi.Exec;
16
18
  import org.embulk.spi.Page;
17
19
  import org.embulk.spi.PageBuilder;
@@ -105,6 +107,7 @@ public class FilteredPageOutput
105
107
 
106
108
 
107
109
  private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
110
+ private final boolean stopOnInvalidRecord;
108
111
  private final List<UnchangedColumn> unchangedColumns;
109
112
  private final List<ExpandedColumn> expandedColumns;
110
113
  private final Column jsonColumn;
@@ -187,6 +190,7 @@ public class FilteredPageOutput
187
190
 
188
191
  FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
189
192
  {
193
+ this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
190
194
  this.jsonColumn = initializeJsonColumn(task, inputSchema);
191
195
  this.unchangedColumns = initializeUnchangedColumns(inputSchema,
192
196
  outputSchema,
@@ -202,18 +206,19 @@ public class FilteredPageOutput
202
206
  @Override
203
207
  public void add(Page page)
204
208
  {
205
- try {
206
- pageReader.setPage(page);
207
-
208
- while (pageReader.nextRecord()) {
209
+ pageReader.setPage(page);
210
+ while (pageReader.nextRecord()) {
211
+ try {
209
212
  setExpandedJsonColumns();
210
213
  setUnchangedColumns();
211
214
  pageBuilder.addRecord();
212
215
  }
213
- }
214
- catch (JsonProcessingException e) {
215
- logger.error(e.getMessage());
216
- throw Throwables.propagate(e);
216
+ catch (DataException | JsonProcessingException e) {
217
+ if (stopOnInvalidRecord) {
218
+ throw new DataException(String.format("Found an invalid record"), e);
219
+ }
220
+ logger.warn(String.format("Skipped an invalid record (%s)", e.getMessage()));
221
+ }
217
222
  }
218
223
  }
219
224
 
@@ -266,14 +271,16 @@ public class FilteredPageOutput
266
271
  if (pageReader.isNull(jsonColumn)) {
267
272
  json = null;
268
273
  }
269
- else if (jsonColumn.getType() == Types.JSON) {
270
- // TODO could use Value object directly and optimize this code
271
- String jsonObject = pageReader.getJson(jsonColumn).toJson();
272
- json = parseContext.parse(jsonObject);
273
- }
274
- else { // Types.STRING
275
- String jsonObject = pageReader.getString(jsonColumn);
276
- json = parseContext.parse(jsonObject);
274
+ else {
275
+ String jsonObject;
276
+ if (jsonColumn.getType().equals(Types.JSON)) {
277
+ jsonObject = pageReader.getJson(jsonColumn).toJson(); // TODO could use Value object directly and optimize this code
278
+ }
279
+ else {
280
+ jsonObject = pageReader.getString(jsonColumn);
281
+ }
282
+
283
+ json = Strings.isNullOrEmpty(jsonObject) ? null : parseContext.parse(jsonObject);
277
284
  }
278
285
 
279
286
  for (ExpandedColumn expandedJsonColumn: expandedColumns) {
@@ -12,6 +12,7 @@ import org.embulk.config.ConfigLoader;
12
12
  import org.embulk.config.ConfigSource;
13
13
  import org.embulk.config.TaskSource;
14
14
  import org.embulk.spi.Column;
15
+ import org.embulk.spi.DataException;
15
16
  import org.embulk.spi.Exec;
16
17
  import org.embulk.spi.Page;
17
18
  import org.embulk.spi.PageOutput;
@@ -21,16 +22,23 @@ import org.embulk.spi.Schema;
21
22
  import org.embulk.spi.SchemaConfigException;
22
23
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
23
24
  import org.embulk.spi.type.Type;
25
+ import org.embulk.spi.util.Pages;
24
26
  import org.junit.Before;
25
27
  import org.junit.Rule;
26
28
  import org.junit.Test;
27
29
  import org.junit.rules.ExpectedException;
30
+ import org.msgpack.value.MapValue;
28
31
  import org.msgpack.value.Value;
29
32
 
33
+ import java.util.List;
34
+
30
35
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.Control;
31
36
  import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.PluginTask;
32
37
  import static org.embulk.spi.type.Types.*;
33
38
  import static org.junit.Assert.assertEquals;
39
+ import static org.junit.Assert.assertNull;
40
+ import static org.junit.Assert.assertTrue;
41
+ import static org.junit.Assert.fail;
34
42
  import static org.msgpack.value.ValueFactory.newArray;
35
43
  import static org.msgpack.value.ValueFactory.newBoolean;
36
44
  import static org.msgpack.value.ValueFactory.newFloat;
@@ -181,6 +189,73 @@ public class TestExpandJsonFilterPlugin
181
189
  Expand Test
182
190
  */
183
191
 
192
+ @Test
193
+ public void testStopOnInvalidRecordOption()
194
+ {
195
+ String configYaml = "" +
196
+ "type: expand_json\n" +
197
+ "json_column_name: _c0\n" +
198
+ "root: $.\n" +
199
+ "expanded_columns:\n" +
200
+ " - {name: _e0, type: json}\n";
201
+ final ConfigSource conf = getConfigFromYaml(configYaml);
202
+ final Schema schema = schema("_c0", STRING);
203
+
204
+ { // stop_on_invalid_record: false
205
+ ConfigSource config = conf.deepCopy();
206
+
207
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
208
+ {
209
+ @Override
210
+ public void run(TaskSource taskSource, Schema outputSchema)
211
+ {
212
+ MockPageOutput mockPageOutput = new MockPageOutput();
213
+
214
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
215
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
216
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
217
+ pageOutput.add(page);
218
+ }
219
+
220
+ pageOutput.finish();
221
+ }
222
+
223
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
224
+ assertEquals(1, records.size());
225
+ assertEquals(0, ((MapValue) records.get(0)[0]).size()); // {}
226
+ }
227
+ });
228
+ }
229
+
230
+ { // stop_on_invalid_record: true
231
+ ConfigSource config = conf.deepCopy().set("stop_on_invalid_record", true);
232
+
233
+ try {
234
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
235
+ {
236
+ @Override
237
+ public void run(TaskSource taskSource, Schema outputSchema)
238
+ {
239
+ MockPageOutput mockPageOutput = new MockPageOutput();
240
+
241
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
242
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
243
+ "{\"_e0\":\"\"}", "{\"_e0\":{}}")) {
244
+ pageOutput.add(page);
245
+ }
246
+
247
+ pageOutput.finish();
248
+ }
249
+ }
250
+ });
251
+ fail();
252
+ }
253
+ catch (Throwable t) {
254
+ assertTrue(t instanceof DataException);
255
+ }
256
+ }
257
+ }
258
+
184
259
  @Test
185
260
  public void testExpandJsonKeyToSchema()
186
261
  {
@@ -328,6 +403,45 @@ public class TestExpandJsonFilterPlugin
328
403
  });
329
404
  }
330
405
 
406
+ @Test
407
+ public void testExpandSpecialJsonValuesFromString()
408
+ {
409
+ final String configYaml = "" +
410
+ "type: expand_json\n" +
411
+ "json_column_name: _c1\n" +
412
+ "root: $.\n" +
413
+ "expanded_columns:\n" +
414
+ " - {name: _e0, type: string}\n" +
415
+ " - {name: _e1, type: string}\n"; // the value will be null
416
+
417
+ ConfigSource config = getConfigFromYaml(configYaml);
418
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
419
+
420
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
421
+ {
422
+ @Override
423
+ public void run(TaskSource taskSource, Schema outputSchema)
424
+ {
425
+ MockPageOutput mockPageOutput = new MockPageOutput();
426
+
427
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
428
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
429
+ "_v0", "")) {
430
+ pageOutput.add(page);
431
+ }
432
+
433
+ pageOutput.finish();
434
+ }
435
+
436
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
437
+ assertEquals("_v0", record[0]);
438
+ assertNull(record[1]);
439
+ assertNull(record[2]);
440
+ }
441
+ }
442
+ });
443
+ }
444
+
331
445
  private static Value s(String value)
332
446
  {
333
447
  return newString(value);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-16 00:00:00.000000000 Z
11
+ date: 2016-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
66
66
  - classpath/accessors-smart-1.1.jar
67
67
  - classpath/asm-5.0.3.jar
68
- - classpath/embulk-filter-expand_json-0.0.6.jar
68
+ - classpath/embulk-filter-expand_json-0.1.0.jar
69
69
  - classpath/json-path-2.2.0.jar
70
70
  - classpath/json-smart-2.2.1.jar
71
71
  - classpath/slf4j-api-1.7.16.jar