embulk-filter-expand_json 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -2
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/classpath/embulk-filter-expand_json-0.1.1.jar +0 -0
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +14 -1
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +6 -1
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +106 -0
- metadata +3 -3
- data/classpath/embulk-filter-expand_json-0.1.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf8c5e6cae24c44c66c9db8dcef9e9ec14325e05
|
4
|
+
data.tar.gz: 461d263c4d02a2b225ff05199d9a84f7e4ae15ef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f033d39efd8b1532bf305bc338a17686c7195c9d9ec1c5e67f476554ba98326e1f90531794d85f589ad01526cb03a7a499664600832e0091a5b659bddb4a4bbd
|
7
|
+
data.tar.gz: 500294bd0331ca19a6a3c26bf704bbbf0f1694afe5b6a64ff5c54df877a7236e76a26c55b15a2b8cec9081bdb36bcdf95d9f3e25b831db135647f789f741cba0
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,19 @@
|
|
1
|
+
0.1.1 (2016-05-02)
|
2
|
+
==================
|
3
|
+
- [New Feature] Add keep_expanding_json_column option
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/19
|
5
|
+
- [Fix] ix retrieving unchanged json columns
|
6
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/17
|
7
|
+
|
1
8
|
0.1.0 (2016-04-27)
|
2
9
|
==================
|
3
|
-
- [Incompatible Change]: Add stop_on_invalid_record option
|
10
|
+
- [Incompatible Change / New Feature]: Add stop_on_invalid_record option
|
4
11
|
- https://github.com/civitaspo/embulk-filter-expand_json/pull/15
|
5
12
|
- https://github.com/civitaspo/embulk-filter-expand_json/issues/14
|
6
13
|
|
7
14
|
0.0.6 (2016-03-17)
|
8
15
|
==================
|
9
|
-
- [
|
16
|
+
- [New Feature] Support JSON type
|
10
17
|
- https://github.com/civitaspo/embulk-filter-expand_json/pull/11
|
11
18
|
- [Enhancement] Validate json_column_name
|
12
19
|
- https://github.com/civitaspo/embulk-filter-expand_json/pull/9
|
data/README.md
CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
|
|
17
17
|
- **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
|
18
18
|
- **type**: type of the column (see below)
|
19
19
|
- **format**: format of the timestamp if type is timestamp
|
20
|
+
- **keep_expanding_json_column**: Not remove the expanding json column from input schema if it's true (false by default)
|
20
21
|
- **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
|
21
22
|
|
22
23
|
---
|
data/build.gradle
CHANGED
Binary file
|
@@ -45,6 +45,10 @@ public class ExpandJsonFilterPlugin
|
|
45
45
|
@Config("stop_on_invalid_record")
|
46
46
|
@ConfigDefault("false")
|
47
47
|
boolean getStopOnInvalidRecord();
|
48
|
+
|
49
|
+
@Config("keep_expanding_json_column")
|
50
|
+
@ConfigDefault("false")
|
51
|
+
public boolean getKeepExpandingJsonColumn();
|
48
52
|
}
|
49
53
|
|
50
54
|
@Override
|
@@ -80,10 +84,19 @@ public class ExpandJsonFilterPlugin
|
|
80
84
|
int i = 0; // columns index
|
81
85
|
for (Column inputColumn: inputSchema.getColumns()) {
|
82
86
|
if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
|
83
|
-
|
87
|
+
if (!task.getKeepExpandingJsonColumn()) {
|
88
|
+
logger.info("removed column: name: {}, type: {}, index: {}",
|
84
89
|
inputColumn.getName(),
|
85
90
|
inputColumn.getType(),
|
86
91
|
inputColumn.getIndex());
|
92
|
+
}
|
93
|
+
else {
|
94
|
+
logger.info("unchanged expanding column: name: {}, type: {}, index: {}",
|
95
|
+
inputColumn.getName(),
|
96
|
+
inputColumn.getType(),
|
97
|
+
i);
|
98
|
+
builder.add(new Column(i++, inputColumn.getName(), inputColumn.getType()));
|
99
|
+
}
|
87
100
|
for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
|
88
101
|
logger.info("added column: name: {}, type: {}, options: {}, index: {}",
|
89
102
|
expandedColumnConfig.getName(),
|
@@ -108,6 +108,7 @@ public class FilteredPageOutput
|
|
108
108
|
|
109
109
|
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
110
110
|
private final boolean stopOnInvalidRecord;
|
111
|
+
private final boolean keepExpandingJsonColumn;
|
111
112
|
private final List<UnchangedColumn> unchangedColumns;
|
112
113
|
private final List<ExpandedColumn> expandedColumns;
|
113
114
|
private final Column jsonColumn;
|
@@ -157,7 +158,7 @@ public class FilteredPageOutput
|
|
157
158
|
for (Column outputColumn : outputSchema.getColumns()) {
|
158
159
|
for (Column inputColumn : inputSchema.getColumns()) {
|
159
160
|
if (inputColumn.getName().equals(outputColumn.getName()) &&
|
160
|
-
!excludeColumn.getName().equals(outputColumn.getName())) {
|
161
|
+
(!excludeColumn.getName().equals(outputColumn.getName()) || keepExpandingJsonColumn)) {
|
161
162
|
|
162
163
|
UnchangedColumn unchangedColumn = new UnchangedColumn(outputColumn.getName(),
|
163
164
|
inputColumn,
|
@@ -191,6 +192,7 @@ public class FilteredPageOutput
|
|
191
192
|
FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
|
192
193
|
{
|
193
194
|
this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
195
|
+
this.keepExpandingJsonColumn = task.getKeepExpandingJsonColumn();
|
194
196
|
this.jsonColumn = initializeJsonColumn(task, inputSchema);
|
195
197
|
this.unchangedColumns = initializeUnchangedColumns(inputSchema,
|
196
198
|
outputSchema,
|
@@ -261,6 +263,9 @@ public class FilteredPageOutput
|
|
261
263
|
else if (Types.TIMESTAMP.equals(outputColumn.getType())) {
|
262
264
|
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
263
265
|
}
|
266
|
+
else { // Json type
|
267
|
+
pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
|
268
|
+
}
|
264
269
|
}
|
265
270
|
}
|
266
271
|
|
@@ -21,6 +21,7 @@ import org.embulk.spi.PageTestUtils;
|
|
21
21
|
import org.embulk.spi.Schema;
|
22
22
|
import org.embulk.spi.SchemaConfigException;
|
23
23
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
24
|
+
import org.embulk.spi.time.Timestamp;
|
24
25
|
import org.embulk.spi.type.Type;
|
25
26
|
import org.embulk.spi.util.Pages;
|
26
27
|
import org.junit.Before;
|
@@ -183,12 +184,63 @@ public class TestExpandJsonFilterPlugin
|
|
183
184
|
assertEquals("$.", task.getRoot());
|
184
185
|
assertEquals("UTC", task.getTimeZone());
|
185
186
|
assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
|
187
|
+
assertEquals(false, task.getStopOnInvalidRecord());
|
188
|
+
assertEquals(false, task.getKeepExpandingJsonColumn());
|
186
189
|
}
|
187
190
|
|
188
191
|
/*
|
189
192
|
Expand Test
|
190
193
|
*/
|
191
194
|
|
195
|
+
@Test
|
196
|
+
public void testUnchangedColumnValues()
|
197
|
+
{
|
198
|
+
String configYaml = "" +
|
199
|
+
"type: expand_json\n" +
|
200
|
+
"json_column_name: _c6\n" +
|
201
|
+
"root: $.\n" +
|
202
|
+
"expanded_columns:\n" +
|
203
|
+
" - {name: _e0, type: string}\n";
|
204
|
+
final ConfigSource config = getConfigFromYaml(configYaml);
|
205
|
+
final Schema schema = schema("_c0", STRING, "_c1", BOOLEAN, "_c2", DOUBLE,
|
206
|
+
"_c3", LONG, "_c4", TIMESTAMP, "_c5", JSON, "_c6", STRING);
|
207
|
+
|
208
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
209
|
+
{
|
210
|
+
@Override
|
211
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
212
|
+
{
|
213
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
214
|
+
|
215
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
216
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
217
|
+
"_v0", // _c0
|
218
|
+
true, // _c1
|
219
|
+
0.2, // _c2
|
220
|
+
3L, // _c3
|
221
|
+
Timestamp.ofEpochSecond(4), // _c4
|
222
|
+
newMapBuilder().put(s("_e0"), s("_v5")).build(), // _c5
|
223
|
+
"{\"_e0\":\"_v6\"}")) {
|
224
|
+
pageOutput.add(page);
|
225
|
+
}
|
226
|
+
|
227
|
+
pageOutput.finish();
|
228
|
+
}
|
229
|
+
|
230
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
231
|
+
assertEquals(1, records.size());
|
232
|
+
|
233
|
+
Object[] record = records.get(0);
|
234
|
+
assertEquals("_v0", record[0]);
|
235
|
+
assertEquals(true, record[1]);
|
236
|
+
assertEquals(0.2, (double) record[2], 0.0001);
|
237
|
+
assertEquals(3L, record[3]);
|
238
|
+
assertEquals(Timestamp.ofEpochSecond(4), record[4]);
|
239
|
+
assertEquals(newMapBuilder().put(s("_e0"), s("_v5")).build(), record[5]);
|
240
|
+
}
|
241
|
+
});
|
242
|
+
}
|
243
|
+
|
192
244
|
@Test
|
193
245
|
public void testStopOnInvalidRecordOption()
|
194
246
|
{
|
@@ -403,6 +455,60 @@ public class TestExpandJsonFilterPlugin
|
|
403
455
|
});
|
404
456
|
}
|
405
457
|
|
458
|
+
@Test
|
459
|
+
public void testExpandedJsonValuesWithKeepJsonColumns()
|
460
|
+
{
|
461
|
+
final String configYaml = "" +
|
462
|
+
"type: expand_json\n" +
|
463
|
+
"json_column_name: _c1\n" +
|
464
|
+
"root: $.\n" +
|
465
|
+
"expanded_columns:\n" +
|
466
|
+
" - {name: _e0, type: string}\n" +
|
467
|
+
"keep_expanding_json_column: true\n";
|
468
|
+
|
469
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
470
|
+
final Schema schema = schema("_c0", STRING, "_c1", STRING);
|
471
|
+
|
472
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
473
|
+
{
|
474
|
+
@Override
|
475
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
476
|
+
{
|
477
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
478
|
+
|
479
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
480
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
481
|
+
"_v0", "{\"_e0\":\"_ev0\"}")) {
|
482
|
+
pageOutput.add(page);
|
483
|
+
}
|
484
|
+
|
485
|
+
pageOutput.finish();
|
486
|
+
}
|
487
|
+
|
488
|
+
assertEquals(3, outputSchema.getColumnCount());
|
489
|
+
Column column;
|
490
|
+
{ // 1st column
|
491
|
+
column = outputSchema.getColumn(0);
|
492
|
+
assertTrue(column.getName().equals("_c0") && column.getType().equals(STRING));
|
493
|
+
}
|
494
|
+
{ // 2nd column
|
495
|
+
column = outputSchema.getColumn(1);
|
496
|
+
assertTrue(column.getName().equals("_c1") && column.getType().equals(STRING));
|
497
|
+
}
|
498
|
+
{ // 3rd column
|
499
|
+
column = outputSchema.getColumn(2);
|
500
|
+
assertTrue(column.getName().equals("_e0") && column.getType().equals(STRING));
|
501
|
+
}
|
502
|
+
|
503
|
+
for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
|
504
|
+
assertEquals("_v0", record[0]);
|
505
|
+
assertEquals("{\"_e0\":\"_ev0\"}", record[1]);
|
506
|
+
assertEquals("_ev0", record[2]);
|
507
|
+
}
|
508
|
+
}
|
509
|
+
});
|
510
|
+
}
|
511
|
+
|
406
512
|
@Test
|
407
513
|
public void testExpandSpecialJsonValuesFromString()
|
408
514
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
66
|
- classpath/accessors-smart-1.1.jar
|
67
67
|
- classpath/asm-5.0.3.jar
|
68
|
-
- classpath/embulk-filter-expand_json-0.1.
|
68
|
+
- classpath/embulk-filter-expand_json-0.1.1.jar
|
69
69
|
- classpath/json-path-2.2.0.jar
|
70
70
|
- classpath/json-smart-2.2.1.jar
|
71
71
|
- classpath/slf4j-api-1.7.16.jar
|
Binary file
|