embulk-filter-expand_json 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 718512990aa52e1733712a39c856c355b57516ed
4
- data.tar.gz: 30763a006d651e9a71226d9c09748724e902d067
3
+ metadata.gz: cf8c5e6cae24c44c66c9db8dcef9e9ec14325e05
4
+ data.tar.gz: 461d263c4d02a2b225ff05199d9a84f7e4ae15ef
5
5
  SHA512:
6
- metadata.gz: 90bc0426d394ac7d5b56ceab6d5e76a790545322836d6efe1db164df994a831e88845df36de62f351acc5d659daa8759d1bb7344a27bfd11c05404f075262b3e
7
- data.tar.gz: afca72527e51e2ce6db90d65d959795a2c14955f95f3d64018a2ffa0855d59e93c8e030eaa6999fda6152daf896838efc4d97efdaf36b99ffb09f09ae4e4c235
6
+ metadata.gz: f033d39efd8b1532bf305bc338a17686c7195c9d9ec1c5e67f476554ba98326e1f90531794d85f589ad01526cb03a7a499664600832e0091a5b659bddb4a4bbd
7
+ data.tar.gz: 500294bd0331ca19a6a3c26bf704bbbf0f1694afe5b6a64ff5c54df877a7236e76a26c55b15a2b8cec9081bdb36bcdf95d9f3e25b831db135647f789f741cba0
data/CHANGELOG.md CHANGED
@@ -1,12 +1,19 @@
1
+ 0.1.1 (2016-05-02)
2
+ ==================
3
+ - [New Feature] Add keep_expanding_json_column option
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/19
5
+ - [Fix] ix retrieving unchanged json columns
6
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/17
7
+
1
8
  0.1.0 (2016-04-27)
2
9
  ==================
3
- - [Incompatible Change]: Add stop_on_invalid_record option
10
+ - [Incompatible Change / New Feature]: Add stop_on_invalid_record option
4
11
  - https://github.com/civitaspo/embulk-filter-expand_json/pull/15
5
12
  - https://github.com/civitaspo/embulk-filter-expand_json/issues/14
6
13
 
7
14
  0.0.6 (2016-03-17)
8
15
  ==================
9
- - [Add] Support JSON type
16
+ - [New Feature] Support JSON type
10
17
  - https://github.com/civitaspo/embulk-filter-expand_json/pull/11
11
18
  - [Enhancement] Validate json_column_name
12
19
  - https://github.com/civitaspo/embulk-filter-expand_json/pull/9
data/README.md CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
17
17
  - **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
18
18
  - **type**: type of the column (see below)
19
19
  - **format**: format of the timestamp if type is timestamp
20
+ - **keep_expanding_json_column**: Not remove the expanding json column from input schema if it's true (false by default)
20
21
  - **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
21
22
 
22
23
  ---
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.1.0"
18
+ version = "0.1.1"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -45,6 +45,10 @@ public class ExpandJsonFilterPlugin
45
45
  @Config("stop_on_invalid_record")
46
46
  @ConfigDefault("false")
47
47
  boolean getStopOnInvalidRecord();
48
+
49
+ @Config("keep_expanding_json_column")
50
+ @ConfigDefault("false")
51
+ public boolean getKeepExpandingJsonColumn();
48
52
  }
49
53
 
50
54
  @Override
@@ -80,10 +84,19 @@ public class ExpandJsonFilterPlugin
80
84
  int i = 0; // columns index
81
85
  for (Column inputColumn: inputSchema.getColumns()) {
82
86
  if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
83
- logger.info("removed column: name: {}, type: {}, index: {}",
87
+ if (!task.getKeepExpandingJsonColumn()) {
88
+ logger.info("removed column: name: {}, type: {}, index: {}",
84
89
  inputColumn.getName(),
85
90
  inputColumn.getType(),
86
91
  inputColumn.getIndex());
92
+ }
93
+ else {
94
+ logger.info("unchanged expanding column: name: {}, type: {}, index: {}",
95
+ inputColumn.getName(),
96
+ inputColumn.getType(),
97
+ i);
98
+ builder.add(new Column(i++, inputColumn.getName(), inputColumn.getType()));
99
+ }
87
100
  for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
88
101
  logger.info("added column: name: {}, type: {}, options: {}, index: {}",
89
102
  expandedColumnConfig.getName(),
@@ -108,6 +108,7 @@ public class FilteredPageOutput
108
108
 
109
109
  private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
110
110
  private final boolean stopOnInvalidRecord;
111
+ private final boolean keepExpandingJsonColumn;
111
112
  private final List<UnchangedColumn> unchangedColumns;
112
113
  private final List<ExpandedColumn> expandedColumns;
113
114
  private final Column jsonColumn;
@@ -157,7 +158,7 @@ public class FilteredPageOutput
157
158
  for (Column outputColumn : outputSchema.getColumns()) {
158
159
  for (Column inputColumn : inputSchema.getColumns()) {
159
160
  if (inputColumn.getName().equals(outputColumn.getName()) &&
160
- !excludeColumn.getName().equals(outputColumn.getName())) {
161
+ (!excludeColumn.getName().equals(outputColumn.getName()) || keepExpandingJsonColumn)) {
161
162
 
162
163
  UnchangedColumn unchangedColumn = new UnchangedColumn(outputColumn.getName(),
163
164
  inputColumn,
@@ -191,6 +192,7 @@ public class FilteredPageOutput
191
192
  FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
192
193
  {
193
194
  this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
195
+ this.keepExpandingJsonColumn = task.getKeepExpandingJsonColumn();
194
196
  this.jsonColumn = initializeJsonColumn(task, inputSchema);
195
197
  this.unchangedColumns = initializeUnchangedColumns(inputSchema,
196
198
  outputSchema,
@@ -261,6 +263,9 @@ public class FilteredPageOutput
261
263
  else if (Types.TIMESTAMP.equals(outputColumn.getType())) {
262
264
  pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
263
265
  }
266
+ else { // Json type
267
+ pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
268
+ }
264
269
  }
265
270
  }
266
271
 
@@ -21,6 +21,7 @@ import org.embulk.spi.PageTestUtils;
21
21
  import org.embulk.spi.Schema;
22
22
  import org.embulk.spi.SchemaConfigException;
23
23
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
24
+ import org.embulk.spi.time.Timestamp;
24
25
  import org.embulk.spi.type.Type;
25
26
  import org.embulk.spi.util.Pages;
26
27
  import org.junit.Before;
@@ -183,12 +184,63 @@ public class TestExpandJsonFilterPlugin
183
184
  assertEquals("$.", task.getRoot());
184
185
  assertEquals("UTC", task.getTimeZone());
185
186
  assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
187
+ assertEquals(false, task.getStopOnInvalidRecord());
188
+ assertEquals(false, task.getKeepExpandingJsonColumn());
186
189
  }
187
190
 
188
191
  /*
189
192
  Expand Test
190
193
  */
191
194
 
195
+ @Test
196
+ public void testUnchangedColumnValues()
197
+ {
198
+ String configYaml = "" +
199
+ "type: expand_json\n" +
200
+ "json_column_name: _c6\n" +
201
+ "root: $.\n" +
202
+ "expanded_columns:\n" +
203
+ " - {name: _e0, type: string}\n";
204
+ final ConfigSource config = getConfigFromYaml(configYaml);
205
+ final Schema schema = schema("_c0", STRING, "_c1", BOOLEAN, "_c2", DOUBLE,
206
+ "_c3", LONG, "_c4", TIMESTAMP, "_c5", JSON, "_c6", STRING);
207
+
208
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
209
+ {
210
+ @Override
211
+ public void run(TaskSource taskSource, Schema outputSchema)
212
+ {
213
+ MockPageOutput mockPageOutput = new MockPageOutput();
214
+
215
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
216
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
217
+ "_v0", // _c0
218
+ true, // _c1
219
+ 0.2, // _c2
220
+ 3L, // _c3
221
+ Timestamp.ofEpochSecond(4), // _c4
222
+ newMapBuilder().put(s("_e0"), s("_v5")).build(), // _c5
223
+ "{\"_e0\":\"_v6\"}")) {
224
+ pageOutput.add(page);
225
+ }
226
+
227
+ pageOutput.finish();
228
+ }
229
+
230
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
231
+ assertEquals(1, records.size());
232
+
233
+ Object[] record = records.get(0);
234
+ assertEquals("_v0", record[0]);
235
+ assertEquals(true, record[1]);
236
+ assertEquals(0.2, (double) record[2], 0.0001);
237
+ assertEquals(3L, record[3]);
238
+ assertEquals(Timestamp.ofEpochSecond(4), record[4]);
239
+ assertEquals(newMapBuilder().put(s("_e0"), s("_v5")).build(), record[5]);
240
+ }
241
+ });
242
+ }
243
+
192
244
  @Test
193
245
  public void testStopOnInvalidRecordOption()
194
246
  {
@@ -403,6 +455,60 @@ public class TestExpandJsonFilterPlugin
403
455
  });
404
456
  }
405
457
 
458
+ @Test
459
+ public void testExpandedJsonValuesWithKeepJsonColumns()
460
+ {
461
+ final String configYaml = "" +
462
+ "type: expand_json\n" +
463
+ "json_column_name: _c1\n" +
464
+ "root: $.\n" +
465
+ "expanded_columns:\n" +
466
+ " - {name: _e0, type: string}\n" +
467
+ "keep_expanding_json_column: true\n";
468
+
469
+ ConfigSource config = getConfigFromYaml(configYaml);
470
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
471
+
472
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
473
+ {
474
+ @Override
475
+ public void run(TaskSource taskSource, Schema outputSchema)
476
+ {
477
+ MockPageOutput mockPageOutput = new MockPageOutput();
478
+
479
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
480
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
481
+ "_v0", "{\"_e0\":\"_ev0\"}")) {
482
+ pageOutput.add(page);
483
+ }
484
+
485
+ pageOutput.finish();
486
+ }
487
+
488
+ assertEquals(3, outputSchema.getColumnCount());
489
+ Column column;
490
+ { // 1st column
491
+ column = outputSchema.getColumn(0);
492
+ assertTrue(column.getName().equals("_c0") && column.getType().equals(STRING));
493
+ }
494
+ { // 2nd column
495
+ column = outputSchema.getColumn(1);
496
+ assertTrue(column.getName().equals("_c1") && column.getType().equals(STRING));
497
+ }
498
+ { // 3rd column
499
+ column = outputSchema.getColumn(2);
500
+ assertTrue(column.getName().equals("_e0") && column.getType().equals(STRING));
501
+ }
502
+
503
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
504
+ assertEquals("_v0", record[0]);
505
+ assertEquals("{\"_e0\":\"_ev0\"}", record[1]);
506
+ assertEquals("_ev0", record[2]);
507
+ }
508
+ }
509
+ });
510
+ }
511
+
406
512
  @Test
407
513
  public void testExpandSpecialJsonValuesFromString()
408
514
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-27 00:00:00.000000000 Z
11
+ date: 2016-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
66
66
  - classpath/accessors-smart-1.1.jar
67
67
  - classpath/asm-5.0.3.jar
68
- - classpath/embulk-filter-expand_json-0.1.0.jar
68
+ - classpath/embulk-filter-expand_json-0.1.1.jar
69
69
  - classpath/json-path-2.2.0.jar
70
70
  - classpath/json-smart-2.2.1.jar
71
71
  - classpath/slf4j-api-1.7.16.jar