embulk-filter-expand_json 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +2 -1
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +6 -4
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +16 -9
- data/src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java +70 -5
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eb2c8defd0d9ac7920f13cdde2f95877f7219e2
|
4
|
+
data.tar.gz: 5cd7424ed6d9863c5572a4b07e3fa83097986ed2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 46c382f407a962967f65d08a33758380307f2f2403991ad77dded4a68df3e0e88e15a67e0b71b67e7b5c4d76a5a2dc2aa0833ae76dcf078006106b99b6dc0734
|
7
|
+
data.tar.gz: db059276a090fdbddf43ddec272c7cea372aeed6c60cb0b6f00f48084fdc820c370f585bc4abeeedd14662b1a90f1d7c51209510eaa3d246ea21cd484162108e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
0.2.0 (2017-07-14)
|
2
|
+
==================
|
3
|
+
- [Incompatible Change]: Remove `time_zone` option, use `default_timezone` instead and column-based timezone.
|
4
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/27
|
5
|
+
- https://github.com/civitaspo/embulk-filter-expand_json/pull/28
|
6
|
+
|
7
|
+
|
1
8
|
0.1.3 (2016-07-27)
|
2
9
|
==================
|
3
10
|
- [Enhancement] Improve Exception handling
|
data/README.md
CHANGED
@@ -18,6 +18,7 @@ expand columns having json into multiple columns
|
|
18
18
|
- **type**: type of the column (see below)
|
19
19
|
- **format**: format of the timestamp if type is timestamp
|
20
20
|
- **keep_expanding_json_column**: Not remove the expanding json column from input schema if it's true (false by default)
|
21
|
+
- **default_timezone**: Time zone of timestamp columns if values don’t include time zone description (`UTC` by default)
|
21
22
|
- **stop_on_invalid_record**: Stop bulk load transaction if an invalid record is included (false by default)
|
22
23
|
|
23
24
|
---
|
@@ -43,7 +44,7 @@ filters:
|
|
43
44
|
- {name: "phone_numbers", type: string}
|
44
45
|
- {name: "app_id", type: long}
|
45
46
|
- {name: "point", type: double}
|
46
|
-
- {name: "created_at", type: timestamp, format: "%Y-%m-%d"}
|
47
|
+
- {name: "created_at", type: timestamp, format: "%Y-%m-%d", timezone: "UTC"}
|
47
48
|
- {name: "profile.anniversary.et", type: string}
|
48
49
|
- {name: "profile.anniversary.voluptatem", type: string}
|
49
50
|
- {name: "profile.like_words[1]", type: string}
|
data/build.gradle
CHANGED
@@ -38,10 +38,7 @@ public class ExpandJsonFilterPlugin
|
|
38
38
|
@Config("expanded_columns")
|
39
39
|
public List<ColumnConfig> getExpandedColumns();
|
40
40
|
|
41
|
-
//
|
42
|
-
@Config("time_zone")
|
43
|
-
@ConfigDefault("\"UTC\"")
|
44
|
-
public String getTimeZone();
|
41
|
+
// default_timezone option from TimestampParser.Task
|
45
42
|
|
46
43
|
@Config("stop_on_invalid_record")
|
47
44
|
@ConfigDefault("false")
|
@@ -56,6 +53,11 @@ public class ExpandJsonFilterPlugin
|
|
56
53
|
public void transaction(ConfigSource config, Schema inputSchema,
|
57
54
|
FilterPlugin.Control control)
|
58
55
|
{
|
56
|
+
// check if deprecated 'time_zone' option is used.
|
57
|
+
if (config.has("time_zone")) {
|
58
|
+
throw new ConfigException("'time_zone' option will be deprecated");
|
59
|
+
}
|
60
|
+
|
59
61
|
PluginTask task = config.loadConfig(PluginTask.class);
|
60
62
|
|
61
63
|
// check if a column specified as json_column_name option exists or not
|
@@ -11,6 +11,8 @@ import com.jayway.jsonpath.JsonPath;
|
|
11
11
|
import com.jayway.jsonpath.Option;
|
12
12
|
import com.jayway.jsonpath.ParseContext;
|
13
13
|
import com.jayway.jsonpath.ReadContext;
|
14
|
+
import org.embulk.config.ConfigSource;
|
15
|
+
import org.embulk.config.Task;
|
14
16
|
import org.embulk.spi.Column;
|
15
17
|
import org.embulk.spi.ColumnConfig;
|
16
18
|
import org.embulk.spi.DataException;
|
@@ -107,6 +109,19 @@ public class FilteredPageOutput
|
|
107
109
|
}
|
108
110
|
}
|
109
111
|
|
112
|
+
private interface TimestampColumnOption
|
113
|
+
extends Task, TimestampParser.TimestampColumnOption
|
114
|
+
{
|
115
|
+
}
|
116
|
+
|
117
|
+
private static TimestampParser createTimestampParser(final PluginTask task,
|
118
|
+
final ColumnConfig columnConfig)
|
119
|
+
{
|
120
|
+
final TimestampColumnOption columnOption = columnConfig.getOption().loadConfig(TimestampColumnOption.class);
|
121
|
+
final String format = columnOption.getFormat().or(task.getDefaultTimestampFormat());
|
122
|
+
final DateTimeZone dateTimeZone = columnOption.getTimeZone().or(task.getDefaultTimeZone());
|
123
|
+
return new TimestampParser(task.getJRuby(), format, dateTimeZone);
|
124
|
+
}
|
110
125
|
|
111
126
|
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
112
127
|
private final boolean stopOnInvalidRecord;
|
@@ -130,15 +145,7 @@ public class FilteredPageOutput
|
|
130
145
|
|
131
146
|
TimestampParser timestampParser = null;
|
132
147
|
if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
|
133
|
-
|
134
|
-
if (expandedColumnConfig.getOption().has("format")) {
|
135
|
-
format = expandedColumnConfig.getOption().get(String.class, "format");
|
136
|
-
}
|
137
|
-
else {
|
138
|
-
format = task.getDefaultTimestampFormat();
|
139
|
-
}
|
140
|
-
DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
|
141
|
-
timestampParser = new TimestampParser(task.getJRuby(), format, timezone);
|
148
|
+
timestampParser = createTimestampParser(task, expandedColumnConfig);
|
142
149
|
}
|
143
150
|
|
144
151
|
ExpandedColumn expandedColumn = new ExpandedColumn(outputColumn.getName(),
|
@@ -164,6 +164,26 @@ public class TestExpandJsonFilterPlugin
|
|
164
164
|
config.loadConfig(PluginTask.class);
|
165
165
|
}
|
166
166
|
|
167
|
+
@Test
|
168
|
+
public void testThrowConfigExceptionIfTimeZoneIsUsed()
|
169
|
+
{
|
170
|
+
String configYaml = "" +
|
171
|
+
"type: expand_json\n" +
|
172
|
+
"time_zone: Asia/Tokyo\n";
|
173
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
174
|
+
schema = schema("_c0", STRING, "_c1", STRING);
|
175
|
+
|
176
|
+
exception.expect(ConfigException.class);
|
177
|
+
exception.expectMessage("'time_zone' option will be deprecated");
|
178
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control() {
|
179
|
+
@Override
|
180
|
+
public void run(TaskSource taskSource, Schema schema)
|
181
|
+
{
|
182
|
+
// do nothing
|
183
|
+
}
|
184
|
+
});
|
185
|
+
}
|
186
|
+
|
167
187
|
@Test
|
168
188
|
public void testThrowExceptionDuplicatedExpandedColumns()
|
169
189
|
{
|
@@ -227,7 +247,7 @@ public class TestExpandJsonFilterPlugin
|
|
227
247
|
PluginTask task = config.loadConfig(PluginTask.class);
|
228
248
|
|
229
249
|
assertEquals("$.", task.getRoot());
|
230
|
-
assertEquals("UTC", task.
|
250
|
+
assertEquals("UTC", task.getDefaultTimeZone().getID());
|
231
251
|
assertEquals("%Y-%m-%d %H:%M:%S.%N %z", task.getDefaultTimestampFormat());
|
232
252
|
assertEquals(false, task.getStopOnInvalidRecord());
|
233
253
|
assertEquals(false, task.getKeepExpandingJsonColumn());
|
@@ -407,6 +427,51 @@ public class TestExpandJsonFilterPlugin
|
|
407
427
|
});
|
408
428
|
}
|
409
429
|
|
430
|
+
@Test
|
431
|
+
public void testColumnBasedTimezone()
|
432
|
+
{
|
433
|
+
String configYaml = "" +
|
434
|
+
"type: expand_json\n" +
|
435
|
+
"json_column_name: _c0\n" +
|
436
|
+
"root: $.\n" +
|
437
|
+
"expanded_columns:\n" +
|
438
|
+
" - {name: _j0, type: timestamp, format: '%Y-%m-%d %H:%M:%S %z'}\n" +
|
439
|
+
" - {name: _j1, type: timestamp, format: '%Y-%m-%d %H:%M:%S', timezone: 'Asia/Tokyo'}\n";
|
440
|
+
|
441
|
+
ConfigSource config = getConfigFromYaml(configYaml);
|
442
|
+
final Schema schema = schema("_c0", JSON, "_c1", STRING);
|
443
|
+
|
444
|
+
expandJsonFilterPlugin.transaction(config, schema, new Control()
|
445
|
+
{
|
446
|
+
@Override
|
447
|
+
public void run(TaskSource taskSource, Schema outputSchema)
|
448
|
+
{
|
449
|
+
MockPageOutput mockPageOutput = new MockPageOutput();
|
450
|
+
Value data = newMapBuilder()
|
451
|
+
.put(s("_j0"), s("2014-10-21 04:44:33 +0000"))
|
452
|
+
.put(s("_j1"), s("2014-10-21 04:44:33"))
|
453
|
+
.build();
|
454
|
+
|
455
|
+
try (PageOutput pageOutput = expandJsonFilterPlugin.open(taskSource, schema, outputSchema, mockPageOutput)) {
|
456
|
+
for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, data, c1Data)) {
|
457
|
+
pageOutput.add(page);
|
458
|
+
}
|
459
|
+
|
460
|
+
pageOutput.finish();
|
461
|
+
}
|
462
|
+
|
463
|
+
PageReader pageReader = new PageReader(outputSchema);
|
464
|
+
|
465
|
+
for (Page page : mockPageOutput.pages) {
|
466
|
+
pageReader.setPage(page);
|
467
|
+
assertEquals("2014-10-21 04:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(0)).toString());
|
468
|
+
assertEquals("2014-10-20 19:44:33 UTC", pageReader.getTimestamp(outputSchema.getColumn(1)).toString());
|
469
|
+
assertEquals(c1Data, pageReader.getString(outputSchema.getColumn(2)));
|
470
|
+
}
|
471
|
+
}
|
472
|
+
});
|
473
|
+
}
|
474
|
+
|
410
475
|
@Test
|
411
476
|
public void testExpandJsonValuesFromJson()
|
412
477
|
{
|
@@ -414,7 +479,7 @@ public class TestExpandJsonFilterPlugin
|
|
414
479
|
"type: expand_json\n" +
|
415
480
|
"json_column_name: _c0\n" +
|
416
481
|
"root: $.\n" +
|
417
|
-
"
|
482
|
+
"default_timezone: Asia/Tokyo\n" +
|
418
483
|
"expanded_columns:\n" +
|
419
484
|
" - {name: _j0, type: boolean}\n" +
|
420
485
|
" - {name: _j1, type: long}\n" +
|
@@ -531,7 +596,7 @@ public class TestExpandJsonFilterPlugin
|
|
531
596
|
"stop_on_invalid_record: 1\n" +
|
532
597
|
"json_column_name: _c0\n" +
|
533
598
|
"root: $.\n" +
|
534
|
-
"
|
599
|
+
"default_timezone: Asia/Tokyo\n" +
|
535
600
|
"expanded_columns:\n" +
|
536
601
|
" - {name: _j0, type: " + ValidType + "}\n";
|
537
602
|
|
@@ -679,7 +744,7 @@ public class TestExpandJsonFilterPlugin
|
|
679
744
|
"type: expand_json\n" +
|
680
745
|
"json_column_name: _c0\n" +
|
681
746
|
"root: $.\n" +
|
682
|
-
"
|
747
|
+
"default_timezone: Asia/Tokyo\n" +
|
683
748
|
"expanded_columns:\n" +
|
684
749
|
" - {name: _j0, type: boolean}\n" +
|
685
750
|
" - {name: _j1, type: long}\n" +
|
@@ -863,7 +928,7 @@ public class TestExpandJsonFilterPlugin
|
|
863
928
|
"type: expand_json\n" +
|
864
929
|
"json_column_name: _c0\n" +
|
865
930
|
"root: $.\n" +
|
866
|
-
"
|
931
|
+
"default_timezone: Asia/Tokyo\n" +
|
867
932
|
"expanded_columns:\n" +
|
868
933
|
" - {name: _j0, type: string}\n";
|
869
934
|
ConfigSource config = getConfigFromYaml(configYaml);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -63,12 +63,12 @@ files:
|
|
63
63
|
- src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java
|
64
64
|
- src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java
|
65
65
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
66
|
-
- classpath/accessors-smart-1.
|
67
|
-
- classpath/asm-5.0.
|
68
|
-
- classpath/embulk-filter-expand_json-0.
|
69
|
-
- classpath/json-path-2.
|
70
|
-
- classpath/json-smart-2.
|
71
|
-
- classpath/slf4j-api-1.7.
|
66
|
+
- classpath/accessors-smart-1.2.jar
|
67
|
+
- classpath/asm-5.0.4.jar
|
68
|
+
- classpath/embulk-filter-expand_json-0.2.0.jar
|
69
|
+
- classpath/json-path-2.4.0.jar
|
70
|
+
- classpath/json-smart-2.3.jar
|
71
|
+
- classpath/slf4j-api-1.7.25.jar
|
72
72
|
homepage: https://github.com/civitaspo/embulk-filter-expand_json
|
73
73
|
licenses:
|
74
74
|
- MIT
|