embulk-filter-expand_json 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -2
- data/README.md +1 -0
- data/build.gradle +1 -1
- data/classpath/embulk-filter-expand_json-0.1.1.jar +0 -0
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +14 -1
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +6 -1
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +106 -0
- metadata +3 -3
- data/classpath/embulk-filter-expand_json-0.1.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf8c5e6cae24c44c66c9db8dcef9e9ec14325e05
|
4
|
+
data.tar.gz: 461d263c4d02a2b225ff05199d9a84f7e4ae15ef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f033d39efd8b1532bf305bc338a17686c7195c9d9ec1c5e67f476554ba98326e1f90531794d85f589ad01526cb03a7a499664600832e0091a5b659bddb4a4bbd
|
7
|
+
data.tar.gz: 500294bd0331ca19a6a3c26bf704bbbf0f1694afe5b6a64ff5c54df877a7236e76a26c55b15a2b8cec9081bdb36bcdf95d9f3e25b831db135647f789f741cba0
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,19 @@
|
|
1
|
+
0.1.1 (2016-05-02)
|
2
|
+
==================
|
3
|
+
- [New Feature] Add keep_expanding_json_column option
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/19
|
5
|
+
- [Fix] ix retrieving unchanged json columns
|
6
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/17
|
7
|
+
|
1
8
|
0.1.0 (2016-04-27)
|
2
9
|
==================
|
3
|
-
- [Incompatible Change]: Add stop_on_invalid_record option
|
10
|
+
- [Incompatible Change / New Feature]: Add stop_on_invalid_record option
|
4
11
|
- https://github.com/civitaspo/embulk-filter-expand_json/pull/15
|
5
12
|
- https://github.com/civitaspo/embulk-filter-expand_json/issues/14
|
6
13
|
|
7
14
|
0.0.6 (2016-03-17)
|
8
15
|
==================
|
9
|
-
- [
|
16
|
+
- [New Feature] Support JSON type
|
10
17
|
- https://github.com/civitaspo/embulk-filter-expand_json/pull/11
|
11
18
|
- [Enhancement] Validate json_column_name
|
12
19
|
- https://github.com/civitaspo/embulk-filter-expand_json/pull/9
|
data/README.md
CHANGED
@@ -17,6 +17,7 @@ expand columns having json into multiple columns
|
|
17
17
|
- **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
|
18
18
|
- **type**: type of the column (see below)
|
19
19
|
- **format**: format of the timestamp if type is timestamp
|
20
|
+
- **keep_expanding_json_column**: Not remove the expanding json column from input schema if it's true (false by default)
|
20
21
|
- **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
|
21
22
|
|
22
23
|
---
|
data/build.gradle
CHANGED
Binary file
|
@@ -45,6 +45,10 @@ public class ExpandJsonFilterPlugin
|
|
45
45
|
@Config("stop_on_invalid_record")
|
46
46
|
@ConfigDefault("false")
|
47
47
|
boolean getStopOnInvalidRecord();
|
48
|
+
|
49
|
+
@Config("keep_expanding_json_column")
|
50
|
+
@ConfigDefault("false")
|
51
|
+
public boolean getKeepExpandingJsonColumn();
|
48
52
|
}
|
49
53
|
|
50
54
|
@Override
|
@@ -80,10 +84,19 @@ public class ExpandJsonFilterPlugin
|
|
80
84
|
int i = 0; // columns index
|
81
85
|
for (Column inputColumn: inputSchema.getColumns()) {
|
82
86
|
if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
|
83
|
-
|
87
|
+
if (!task.getKeepExpandingJsonColumn()) {
|
88
|
+
logger.info("removed column: name: {}, type: {}, index: {}",
|
84
89
|
inputColumn.getName(),
|
85
90
|
inputColumn.getType(),
|
86
91
|
inputColumn.getIndex());
|
92
|
+
}
|
93
|
+
else {
|
94
|
+
logger.info("unchanged expanding column: name: {}, type: {}, index: {}",
|
95
|
+
inputColumn.getName(),
|
96
|
+
inputColumn.getType(),
|
97
|
+
i);
|
98
|
+
builder.add(new Column(i++, inputColumn.getName(), inputColumn.getType()));
|
99
|
+
}
|
87
100
|
for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
|
88
101
|
logger.info("added column: name: {}, type: {}, options: {}, index: {}",
|
89
102
|
expandedColumnConfig.getName(),
|
@@ -108,6 +108,7 @@ public class FilteredPageOutput
|
|
108
108
|
|
109
109
|
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
110
110
|
private final boolean stopOnInvalidRecord;
|
111
|
+
private final boolean keepExpandingJsonColumn;
|
111
112
|
private final List<UnchangedColumn> unchangedColumns;
|
112
113
|
private final List<ExpandedColumn> expandedColumns;
|
113
114
|
private final Column jsonColumn;
|
@@ -157,7 +158,7 @@ public class FilteredPageOutput
|
|
157
158
|
for (Column outputColumn : outputSchema.getColumns()) {
|
158
159
|
for (Column inputColumn : inputSchema.getColumns()) {
|
159
160
|
if (inputColumn.getName().equals(outputColumn.getName()) &&
|
160
|
-
!excludeColumn.getName().equals(outputColumn.getName())) {
|
161
|
+
(!excludeColumn.getName().equals(outputColumn.getName()) || keepExpandingJsonColumn)) {
|
161
162
|
|
162
163
|
UnchangedColumn unchangedColumn = new UnchangedColumn(outputColumn.getName(),
|
163
164
|
inputColumn,
|
@@ -191,6 +192,7 @@ public class FilteredPageOutput
|
|
191
192
|
FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
|
192
193
|
{
|
193
194
|
this.stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
195
|
+
this.keepExpandingJsonColumn = task.getKeepExpandingJsonColumn();
|
194
196
|
this.jsonColumn = initializeJsonColumn(task, inputSchema);
|
195
197
|
this.unchangedColumns = initializeUnchangedColumns(inputSchema,
|
196
198
|
outputSchema,
|
@@ -261,6 +263,9 @@ public class FilteredPageOutput
|
|
261
263
|
else if (Types.TIMESTAMP.equals(outputColumn.getType())) {
|
262
264
|
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
263
265
|
}
|
266
|
+
else { // Json type
|
267
|
+
pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
|
268
|
+
}
|
264
269
|
}
|
265
270
|
}
|
266
271
|
|
@@ -21,6 +21,7 @@ import org.embulk.spi.PageTestUtils;
|
|
21
21
|
import org.embulk.spi.Schema;
|
22
22
|
import org.embulk.spi.SchemaConfigException;
|
23
23
|
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
24
|
+
import org.embulk.spi.time.Timestamp;
|
24
25
|
import org.embulk.spi.type.Type;
|
25
26
|
import org.embulk.spi.util.Pages;
|
26
27
|
import org.junit.Before;
|
@@ -183,12 +184,63 @@ public class TestExpandJsonFilterPlugin
|
|
183
184
|
assertEquals("$.", task.getRoot());
|
184
185
|
assertEquals("UTC", task.getTimeZone());
|
185
186
|
assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
|
187
|
+
assertEquals(false, task.getStopOnInvalidRecord());
|
188
|
+
assertEquals(false, task.getKeepExpandingJsonColumn());
|
186
189
|
}
|
187
190
|
|
188
191
|
/*
|
189
192
|
Expand Test
|
190
193
|
*/
|
191
194
|
|
195
|
+
@Test
|
196
|
+
public void testUnchangedColumnValues()
|
197
|
+
{
|
198
|
+
String configYaml = "" +
|
199
|
+
"type: expand_json\n" +
|
200
|
+
"json_column_name: _c6\n" +
|
201
|
+
"root: $.\n" +
|
202
|
+
"expanded_columns:\n" +
|
203
|
+
" - {name: _e0, type: string}\n";
|
204
|
+
final ConfigSource config = getConfigFromYaml(configYaml);
|
205
|
+
final Schema schema = schema("_c0", STRING, "_c1", BOOLEAN, "_c2", DOUBLE,
|
206
|
+
"_c3", LONG, "_c4", TIMESTAMP, "_c5", JSON, "_c6", STRING);
|
207
|
+
|
208
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
209
|
+
{
|
210
|
+
@Override
|
211
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
212
|
+
{
|
213
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
214
|
+
|
215
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
216
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
217
|
+
"_v0", // _c0
|
218
|
+
true, // _c1
|
219
|
+
0.2, // _c2
|
220
|
+
3L, // _c3
|
221
|
+
Timestamp.ofEpochSecond(4), // _c4
|
222
|
+
newMapBuilder().put(s("_e0"), s("_v5")).build(), // _c5
|
223
|
+
"{\"_e0\":\"_v6\"}")) {
|
224
|
+
pageOutput.add(page);
|
225
|
+
}
|
226
|
+
|
227
|
+
pageOutput.finish();
|
228
|
+
}
|
229
|
+
|
230
|
+
List<Object[]> records = Pages.toObjects(outputSchema, mockPageOutput.pages);
|
231
|
+
assertEquals(1, records.size());
|
232
|
+
|
233
|
+
Object[] record = records.get(0);
|
234
|
+
assertEquals("_v0", record[0]);
|
235
|
+
assertEquals(true, record[1]);
|
236
|
+
assertEquals(0.2, (double) record[2], 0.0001);
|
237
|
+
assertEquals(3L, record[3]);
|
238
|
+
assertEquals(Timestamp.ofEpochSecond(4), record[4]);
|
239
|
+
assertEquals(newMapBuilder().put(s("_e0"), s("_v5")).build(), record[5]);
|
240
|
+
}
|
241
|
+
});
|
242
|
+
}
|
243
|
+
|
192
244
|
@Test
|
193
245
|
public void testStopOnInvalidRecordOption()
|
194
246
|
{
|
@@ -403,6 +455,60 @@ public class TestExpandJsonFilterPlugin
|
|
403
455
|
});
|
404
456
|
}
|
405
457
|
|
458
|
+
@Test
|
459
|
+
public void testExpandedJsonValuesWithKeepJsonColumns()
|
460
|
+
{
|
461
|
+
final String configYaml = "" +
|
462
|
+
"type: expand_json\n" +
|
463
|
+
"json_column_name: _c1\n" +
|
464
|
+
"root: $.\n" +
|
465
|
+
"expanded_columns:\n" +
|
466
|
+
" - {name: _e0, type: string}\n" +
|
467
|
+
"keep_expanding_json_column: true\n";
|
468
|
+
|
469
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
470
|
+
final Schema schema = schema("_c0", STRING, "_c1", STRING);
|
471
|
+
|
472
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
473
|
+
{
|
474
|
+
@Override
|
475
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
476
|
+
{
|
477
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
478
|
+
|
479
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
480
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema,
|
481
|
+
"_v0", "{\"_e0\":\"_ev0\"}")) {
|
482
|
+
pageOutput.add(page);
|
483
|
+
}
|
484
|
+
|
485
|
+
pageOutput.finish();
|
486
|
+
}
|
487
|
+
|
488
|
+
assertEquals(3, outputSchema.getColumnCount());
|
489
|
+
Column column;
|
490
|
+
{ // 1st column
|
491
|
+
column = outputSchema.getColumn(0);
|
492
|
+
assertTrue(column.getName().equals("_c0") && column.getType().equals(STRING));
|
493
|
+
}
|
494
|
+
{ // 2nd column
|
495
|
+
column = outputSchema.getColumn(1);
|
496
|
+
assertTrue(column.getName().equals("_c1") && column.getType().equals(STRING));
|
497
|
+
}
|
498
|
+
{ // 3rd column
|
499
|
+
column = outputSchema.getColumn(2);
|
500
|
+
assertTrue(column.getName().equals("_e0") && column.getType().equals(STRING));
|
501
|
+
}
|
502
|
+
|
503
|
+
for (Object[] record : Pages.toObjects(outputSchema, mockPageOutput.pages)) {
|
504
|
+
assertEquals("_v0", record[0]);
|
505
|
+
assertEquals("{\"_e0\":\"_ev0\"}", record[1]);
|
506
|
+
assertEquals("_ev0", record[2]);
|
507
|
+
}
|
508
|
+
}
|
509
|
+
});
|
510
|
+
}
|
511
|
+
|
406
512
|
@Test
|
407
513
|
public void testExpandSpecialJsonValuesFromString()
|
408
514
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,7 +65,7 @@ files:
|
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
66
|
- classpath/accessors-smart-1.1.jar
|
67
67
|
- classpath/asm-5.0.3.jar
|
68
|
-
- classpath/embulk-filter-expand_json-0.1.
|
68
|
+
- classpath/embulk-filter-expand_json-0.1.1.jar
|
69
69
|
- classpath/json-path-2.2.0.jar
|
70
70
|
- classpath/json-smart-2.2.1.jar
|
71
71
|
- classpath/slf4j-api-1.7.16.jar
|
Binary file
|