embulk-filter-expand_json 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 718512990aa52e1733712a39c856c355b57516ed
4
- data.tar.gz: 30763a006d651e9a71226d9c09748724e902d067
3
+ metadata.gz: cf8c5e6cae24c44c66c9db8dcef9e9ec14325e05
4
+ data.tar.gz: 461d263c4d02a2b225ff05199d9a84f7e4ae15ef
5
5
  SHA512:
6
- metadata.gz: 90bc0426d394ac7d5b56ceab6d5e76a790545322836d6efe1db164df994a831e88845df36de62f351acc5d659daa8759d1bb7344a27bfd11c05404f075262b3e
7
- data.tar.gz: afca72527e51e2ce6db90d65d959795a2c14955f95f3d64018a2ffa0855d59e93c8e030eaa6999fda6152daf896838efc4d97efdaf36b99ffb09f09ae4e4c235
6
+ metadata.gz: f033d39efd8b1532bf305bc338a17686c7195c9d9ec1c5e67f476554ba98326e1f90531794d85f589ad01526cb03a7a499664600832e0091a5b659bddb4a4bbd
7
+ data.tar.gz: 500294bd0331ca19a6a3c26bf704bbbf0f1694afe5b6a64ff5c54df877a7236e76a26c55b15a2b8cec9081bdb36bcdf95d9f3e25b831db135647f789f741cba0
data/CHANGELOG.md CHANGED
@@ -1,12 +1,19 @@
1
+ 0.1.1 (2016-05-02)
2
+ ==================
3
+ - [New Feature] Add keep_expanding_json_column option
4
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/19
5
+ - [Fix] ix retrieving unchanged json columns
6
+ - https://github.com/civitaspo/embulk-filter-expand_json/pull/17
7
+
1
8
  0.1.0 (2016-04-27)
2
9
  ==================
3
- - [Incompatible Change]: Add stop_on_invalid_record option
10
+ - [Incompatible Change / New Feature]: Add stop_on_invalid_record option
4
11
  - https://github.com/civitaspo/embulk-filter-expand_json/pull/15
5
12
  - https://github.com/civitaspo/embulk-filter-expand_json/issues/14
6
13
 
7
14
  0.0.6 (2016-03-17)
8
15
  ==================
9
- - [Add] Support JSON type
16
+ - [New Feature] Support JSON type
10
17
  - https://github.com/civitaspo/embulk-filter-expand_json/pull/11
11
18
  - [Enhancement] Validate json_column_name
12
19
  - https://github.com/civitaspo/embulk-filter-expand_json/pull/9
data/README.md CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
17
17
  - **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
18
18
  - **type**: type of the column (see below)
19
19
  - **format**: format of the timestamp if type is timestamp
20
+ - **keep_expanding_json_column**: Not remove the expanding json column from input schema if it's true (false by default)
20
21
  - **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
21
22
 
22
23
  ---
data/build.gradle CHANGED
@@ -15,7 +15,7 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.1.0"
18
+ version = "0.1.1"
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
@@ -45,6 +45,10 @@ public class ExpandJsonFilterPlugin
45
45
  @Config("stop_on_invalid_record")
46
46
  @ConfigDefault("false")
47
47
  boolean getStopOnInvalidRecord();
48
+
49
+ @Config("keep_expanding_json_column")
50
+ @ConfigDefault("false")
51
+ public boolean getKeepExpandingJsonColumn();
48
52
  }
49
53
 
50
54
  @Override
@@ -80,10 +84,19 @@ public class ExpandJsonFilterPlugin
80
84
  int i = 0; // columns index
81
85
  for (Column inputColumn: inputSchema.getColumns()) {
82
86
  if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
83
- logger.info("removed column: name: {}, type: {}, index: {}",
87
+ if (!task.getKeepExpandingJsonColumn()) {
88
+ logger.info("removed column: name: {}, type: {}, index: {}",
84
89
  inputColumn.getName(),
85
90
  inputColumn.getType(),
86
91
  inputColumn.getIndex());
92
+ }
93
+ else {
94
+ logger.info("unchanged expanding column: name: {}, type: {}, index: {}",
95
+ inputColumn.getName(),
96
+ inputColumn.getType(),
97
+ i);
98
+ builder.add(new Column(i++, inputColumn.getName(), inputColumn.getType()));
99
+ }
87
100
  for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
88
101
  logger.info("added column: name: {}, type: {}, options: {}, index: {}",
89
102
  expandedColumnConfig.getName(),
@@ -108,6 +108,7 @@ public class FilteredPageOutput
108
108
 
109
109
  private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
110
110
  private final boolean stopOnInvalidRecord;
111
+ private final boolean keepExpandingJsonColumn;
111
112
  private final List<UnchangedColumn> unchangedColumns;
112
113
  private final List<ExpandedColumn> expandedColumns;
113
114
  private final Column jsonColumn;
@@ -157,7 +158,7 @@ public class FilteredPageOutput
157
158
  for (Column outputColumn : outputSchema.getColumns()) {
158
159
  for (Column inputColumn : inputSchema.getColumns()) {
159
160
  if (inputColumn.getName().equals(outputColumn.getName()) &&
160
- !excludeColumn.getName().equals(outputColumn.getName())) {
161
+ (!excludeColumn.getName().equals(outputColumn.getName()) || keepExpandingJsonColumn)) {
161
162
 
162
163
  UnchangedColumn unchangedColumn = new UnchangedColumn(outputColumn.getName(),
163
164
  inputColumn,
@@ -191,6 +192,7 @@ public class FilteredPageOutput
191
192
  FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
192
193
  {
193
194
  this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
195
+ this.keepExpandingJsonColumn = task.getKeepExpandingJsonColumn();
194
196
  this.jsonColumn = initializeJsonColumn(task, inputSchema);
195
197
  this.unchangedColumns = initializeUnchangedColumns(inputSchema,
196
198
  outputSchema,
@@ -261,6 +263,9 @@ public class FilteredPageOutput
261
263
  else if (Types.TIMESTAMP.equals(outputColumn.getType())) {
262
264
  pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
263
265
  }
266
+ else { // Json type
267
+ pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
268
+ }
264
269
  }
265
270
  }
266
271
 
@@ -21,6 +21,7 @@ import org.embulk.spi.PageTestUtils;
21
21
  import org.embulk.spi.Schema;
22
22
  import org.embulk.spi.SchemaConfigException;
23
23
  import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
24
+ import org.embulk.spi.time.Timestamp;
24
25
  import org.embulk.spi.type.Type;
25
26
  import org.embulk.spi.util.Pages;
26
27
  import org.junit.Before;
@@ -183,12 +184,63 @@ public class TestExpandJsonFilterPlugin
183
184
  assertEquals("$.", task.getRoot());
184
185
  assertEquals("UTC", task.getTimeZone());
185
186
  assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
187
+ assertEquals(false, task.getStopOnInvalidRecord());
188
+ assertEquals(false, task.getKeepExpandingJsonColumn());
186
189
  }
187
190
 
188
191
  /*
189
192
  Expand Test
190
193
  */
191
194
 
195
+ @Test
196
+ public void testUnchangedColumnValues()
197
+ {
198
+ String configYaml = "" +
199
+ "type: expand_json\n" +
200
+ "json_column_name: _c6\n" +
201
+ "root: $.\n" +
202
+ "expanded_columns:\n" +
203
+ " - {name: _e0, type: string}\n";
204
+ final ConfigSource config = getConfigFromYaml(configYaml);
205
+ final Schema schema = schema("_c0", STRING, "_c1", BOOLEAN, "_c2", DOUBLE,
206
+ "_c3", LONG, "_c4", TIMESTAMP, "_c5", JSON, "_c6", STRING);
207
+
208
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
209
+ {
210
+ @Override
211
+ public void run(TaskSource taskSource, Schema outputSchema)
212
+ {
213
+ MockPageOutput mockPageOutput = new MockPageOutput();
214
+
215
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
216
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
217
+ "_v0", // _c0
218
+ true, // _c1
219
+ 0.2, // _c2
220
+ 3L, // _c3
221
+ Timestamp.ofEpochSecond(4), // _c4
222
+ newMapBuilder().put(s("_e0"), s("_v5")).build(), // _c5
223
+ "{\"_e0\":\"_v6\"}")) {
224
+ pageOutput.add(page);
225
+ }
226
+
227
+ pageOutput.finish();
228
+ }
229
+
230
+ List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
231
+ assertEquals(1, records.size());
232
+
233
+ Object[] record = records.get(0);
234
+ assertEquals("_v0", record[0]);
235
+ assertEquals(true, record[1]);
236
+ assertEquals(0.2, (double) record[2], 0.0001);
237
+ assertEquals(3L, record[3]);
238
+ assertEquals(Timestamp.ofEpochSecond(4), record[4]);
239
+ assertEquals(newMapBuilder().put(s("_e0"), s("_v5")).build(), record[5]);
240
+ }
241
+ });
242
+ }
243
+
192
244
  @Test
193
245
  public void testStopOnInvalidRecordOption()
194
246
  {
@@ -403,6 +455,60 @@ public class TestExpandJsonFilterPlugin
403
455
  });
404
456
  }
405
457
 
458
+ @Test
459
+ public void testExpandedJsonValuesWithKeepJsonColumns()
460
+ {
461
+ final String configYaml = "" +
462
+ "type: expand_json\n" +
463
+ "json_column_name: _c1\n" +
464
+ "root: $.\n" +
465
+ "expanded_columns:\n" +
466
+ " - {name: _e0, type: string}\n" +
467
+ "keep_expanding_json_column: true\n";
468
+
469
+ ConfigSource config = getConfigFromYaml(configYaml);
470
+ final Schema schema = schema("_c0", STRING, "_c1", STRING);
471
+
472
+ expandJsonFilterPlugin.transaction(config, schema, new Control()
473
+ {
474
+ @Override
475
+ public void run(TaskSource taskSource, Schema outputSchema)
476
+ {
477
+ MockPageOutput mockPageOutput = new MockPageOutput();
478
+
479
+ try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
480
+ for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
481
+ "_v0", "{\"_e0\":\"_ev0\"}")) {
482
+ pageOutput.add(page);
483
+ }
484
+
485
+ pageOutput.finish();
486
+ }
487
+
488
+ assertEquals(3, outputSchema.getColumnCount());
489
+ Column column;
490
+ { // 1st column
491
+ column = outputSchema.getColumn(0);
492
+ assertTrue(column.getName().equals("_c0") && column.getType().equals(STRING));
493
+ }
494
+ { // 2nd column
495
+ column = outputSchema.getColumn(1);
496
+ assertTrue(column.getName().equals("_c1") && column.getType().equals(STRING));
497
+ }
498
+ { // 3rd column
499
+ column = outputSchema.getColumn(2);
500
+ assertTrue(column.getName().equals("_e0") && column.getType().equals(STRING));
501
+ }
502
+
503
+ for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
504
+ assertEquals("_v0", record[0]);
505
+ assertEquals("{\"_e0\":\"_ev0\"}", record[1]);
506
+ assertEquals("_ev0", record[2]);
507
+ }
508
+ }
509
+ });
510
+ }
511
+
406
512
  @Test
407
513
  public void testExpandSpecialJsonValuesFromString()
408
514
  {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-expand_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Civitaspo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-27 00:00:00.000000000 Z
11
+ date: 2016-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,7 +65,7 @@ files:
65
65
  - src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
66
66
  - classpath/accessors-smart-1.1.jar
67
67
  - classpath/asm-5.0.3.jar
68
- - classpath/embulk-filter-expand_json-0.1.0.jar
68
+ - classpath/embulk-filter-expand_json-0.1.1.jar
69
69
  - classpath/json-path-2.2.0.jar
70
70
  - classpath/json-smart-2.2.1.jar
71
71
  - classpath/slf4j-api-1.7.16.jar