embulk-filter-expand_json 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -9
- data/build.gradle +1 -1
- data/classpath/embulk-filter-expand_json-0.0.2.jar +0 -0
- data/example/config.yml +10 -9
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +9 -183
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +223 -0
- metadata +4 -3
- data/classpath/embulk-filter-expand_json-0.0.1.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 021e96f2218ac3326f0c470f860bde498866eff9
|
4
|
+
data.tar.gz: dd085b630641bed9a2829b26af1bb03e5ad75680
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d6a8fc2b5b53e7b31d8246b231a48ff796cb4d74fcc0cadb713ff22d990327f623e337e56a97c0e0c0ffeb3b1027171199a1193962a6d6854ca552c0de78a2a
|
7
|
+
data.tar.gz: d3ad561c853b5677aa1e968007086dd1bcaf7cf6e116119b0b31906d7dd5922bf8c9920bdbdd74f2c6fcfdaa6dc0c8ffcf88c2545611189b2b734d8fcf416756
|
data/README.md
CHANGED
@@ -9,27 +9,41 @@ expand columns having json into multiple columns
|
|
9
9
|
## Configuration
|
10
10
|
|
11
11
|
- **json_column_name**: a column name having json to be expanded (string, required)
|
12
|
+
- **root**: root property to start fetching each entries, specify in [JsonPath](http://goessner.net/articles/JsonPath/) style (string, default: `\"$.\"`)
|
12
13
|
- **expanded_columns**: columns expanded into multiple columns (array of hash, required)
|
13
14
|
- **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
|
14
15
|
- **type**: type of the column (see below)
|
15
16
|
- **format**: format of the timestamp if type is timestamp
|
16
17
|
|
18
|
+
---
|
19
|
+
**type of the column**
|
20
|
+
|
21
|
+
|name|description|
|
22
|
+
|:---|:---|
|
23
|
+
|boolean|true or false|
|
24
|
+
|long|64-bit signed integers|
|
25
|
+
|timestamp|Date and time with nano-seconds precision|
|
26
|
+
|double|64-bit floating point numbers|
|
27
|
+
|string|Strings|
|
28
|
+
|
29
|
+
|
17
30
|
## Example
|
18
31
|
|
19
32
|
```yaml
|
20
33
|
filters:
|
21
34
|
- type: expand_json
|
22
35
|
json_column_name: json_payload
|
36
|
+
root: "$."
|
23
37
|
expanded_columns:
|
24
|
-
- {name: "
|
25
|
-
- {name: "
|
26
|
-
- {name: "
|
27
|
-
- {name: "
|
28
|
-
- {name: "
|
29
|
-
- {name: "
|
30
|
-
- {name: "
|
31
|
-
- {name: "
|
32
|
-
- {name: "
|
38
|
+
- {name: "phone_numbers", type: string}
|
39
|
+
- {name: "app_id", type: long}
|
40
|
+
- {name: "point", type: double}
|
41
|
+
- {name: "created_at", type: timestamp, format: "%Y-%m-%d"}
|
42
|
+
- {name: "profile.anniversary.et", type: string}
|
43
|
+
- {name: "profile.anniversary.voluptatem", type: string}
|
44
|
+
- {name: "profile.like_words[1]", type: string}
|
45
|
+
- {name: "profile.like_words[2]", type: string}
|
46
|
+
- {name: "profile.like_words[0]", type: string}
|
33
47
|
```
|
34
48
|
|
35
49
|
## Note
|
data/build.gradle
CHANGED
Binary file
|
data/example/config.yml
CHANGED
@@ -19,16 +19,17 @@ in:
|
|
19
19
|
filters:
|
20
20
|
- type: expand_json
|
21
21
|
json_column_name: json_payload
|
22
|
+
root: "$."
|
22
23
|
expanded_columns:
|
23
|
-
- {name: "
|
24
|
-
- {name: "
|
25
|
-
- {name: "
|
26
|
-
- {name: "
|
27
|
-
- {name: "
|
28
|
-
- {name: "
|
29
|
-
- {name: "
|
30
|
-
- {name: "
|
31
|
-
- {name: "
|
24
|
+
- {name: "phone_numbers", type: string}
|
25
|
+
- {name: "app_id", type: long}
|
26
|
+
- {name: "point", type: double}
|
27
|
+
- {name: "created_at", type: timestamp, format: "%Y-%m-%d"}
|
28
|
+
- {name: "profile.anniversary.et", type: string}
|
29
|
+
- {name: "profile.anniversary", type: string}
|
30
|
+
- {name: "profile.like_words[1]", type: string}
|
31
|
+
- {name: "profile.like_words[2]", type: string}
|
32
|
+
- {name: "profile.like_words", type: string}
|
32
33
|
|
33
34
|
out:
|
34
35
|
type: stdout
|
@@ -45,6 +45,10 @@ public class ExpandJsonFilterPlugin
|
|
45
45
|
@Config("json_column_name")
|
46
46
|
public String getJsonColumnName();
|
47
47
|
|
48
|
+
@Config("root")
|
49
|
+
@ConfigDefault("\"$.\"")
|
50
|
+
public String getRoot();
|
51
|
+
|
48
52
|
@Config("expanded_columns")
|
49
53
|
public List<ColumnConfig> getExpandedColumns();
|
50
54
|
|
@@ -52,10 +56,6 @@ public class ExpandJsonFilterPlugin
|
|
52
56
|
@ConfigDefault("\"UTC\"")
|
53
57
|
public String getTimeZone();
|
54
58
|
|
55
|
-
// TODO if needed: add the original column name as the prefix of expanded
|
56
|
-
// @Config("add_original_column_name_as_prefix")
|
57
|
-
// @ConfigDefault("false")
|
58
|
-
// public String getOption2();
|
59
59
|
}
|
60
60
|
|
61
61
|
@Override
|
@@ -63,11 +63,7 @@ public class ExpandJsonFilterPlugin
|
|
63
63
|
FilterPlugin.Control control)
|
64
64
|
{
|
65
65
|
PluginTask task = config.loadConfig(PluginTask.class);
|
66
|
-
|
67
|
-
Schema outputSchema = buildOutputSchema(inputSchema,
|
68
|
-
task.getJsonColumnName(),
|
69
|
-
task.getExpandedColumns());
|
70
|
-
|
66
|
+
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
71
67
|
control.run(task.dump(), outputSchema);
|
72
68
|
}
|
73
69
|
|
@@ -76,173 +72,20 @@ public class ExpandJsonFilterPlugin
|
|
76
72
|
final Schema outputSchema, final PageOutput output)
|
77
73
|
{
|
78
74
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
79
|
-
|
80
|
-
final List<Column> inputColumns = inputSchema.getColumns();
|
81
|
-
|
82
|
-
final List<Column> inputColumnsExceptExpandedJsonColumn = new ArrayList<>();
|
83
|
-
final List<Column> expandedJsonColumns = new ArrayList<>();
|
84
|
-
|
85
|
-
for (Column column : outputSchema.getColumns()) {
|
86
|
-
if (inputColumns.contains(column)) {
|
87
|
-
inputColumnsExceptExpandedJsonColumn.add(column);
|
88
|
-
}
|
89
|
-
else {
|
90
|
-
expandedJsonColumns.add(column);
|
91
|
-
}
|
92
|
-
}
|
93
|
-
|
94
|
-
Column temporaryJsonColumn = null;
|
95
|
-
for (Column column: inputColumns) {
|
96
|
-
if (column.getName().contentEquals(task.getJsonColumnName())) {
|
97
|
-
temporaryJsonColumn = column;
|
98
|
-
}
|
99
|
-
}
|
100
|
-
final Column jsonColumn = temporaryJsonColumn;
|
101
|
-
|
102
|
-
final HashMap<String, TimestampParser> timestampParserMap = buildTimestampParserMap(task.getJRuby(),
|
103
|
-
task.getExpandedColumns(),
|
104
|
-
task.getTimeZone());
|
105
|
-
return new PageOutput()
|
106
|
-
{
|
107
|
-
private PageReader pageReader = new PageReader(inputSchema);
|
108
|
-
|
109
|
-
@Override
|
110
|
-
public void add(Page page)
|
111
|
-
{
|
112
|
-
try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output)) {
|
113
|
-
pageReader.setPage(page);
|
114
|
-
|
115
|
-
while (pageReader.nextRecord()) {
|
116
|
-
setInputColumnsExceptFlattenJsonColumns(pageBuilder, inputColumnsExceptExpandedJsonColumn);
|
117
|
-
setExpandedJsonColumns(pageBuilder, jsonColumn, expandedJsonColumns, timestampParserMap);
|
118
|
-
pageBuilder.addRecord();
|
119
|
-
}
|
120
|
-
pageBuilder.finish();
|
121
|
-
}
|
122
|
-
catch (JsonProcessingException e) {
|
123
|
-
logger.error(e.getMessage());
|
124
|
-
throw Throwables.propagate(e);
|
125
|
-
}
|
126
|
-
}
|
127
|
-
|
128
|
-
@Override
|
129
|
-
public void finish()
|
130
|
-
{
|
131
|
-
output.finish();
|
132
|
-
}
|
133
|
-
|
134
|
-
@Override
|
135
|
-
public void close()
|
136
|
-
{
|
137
|
-
pageReader.close();
|
138
|
-
output.close();
|
139
|
-
}
|
140
|
-
|
141
|
-
private void setInputColumnsExceptFlattenJsonColumns(PageBuilder pageBuilder, List<Column> inputColumnsExceptExpandedJsonColumn) {
|
142
|
-
for (Column inputColumn: inputColumnsExceptExpandedJsonColumn) {
|
143
|
-
if (pageReader.isNull(inputColumn)) {
|
144
|
-
pageBuilder.setNull(inputColumn);
|
145
|
-
continue;
|
146
|
-
}
|
147
|
-
|
148
|
-
if (Types.STRING.equals(inputColumn.getType())) {
|
149
|
-
pageBuilder.setString(inputColumn, pageReader.getString(inputColumn));
|
150
|
-
}
|
151
|
-
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
152
|
-
pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn));
|
153
|
-
}
|
154
|
-
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
155
|
-
pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn));
|
156
|
-
}
|
157
|
-
else if (Types.LONG.equals(inputColumn.getType())) {
|
158
|
-
pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn));
|
159
|
-
}
|
160
|
-
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
161
|
-
pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn));
|
162
|
-
}
|
163
|
-
}
|
164
|
-
}
|
165
|
-
|
166
|
-
private void setExpandedJsonColumns(PageBuilder pageBuilder, Column originalJsonColumn, List<Column> expandedJsonColumns, HashMap<String, TimestampParser> timestampParserMap)
|
167
|
-
throws JsonProcessingException
|
168
|
-
{
|
169
|
-
final ReadContext json;
|
170
|
-
if (pageReader.isNull(originalJsonColumn)) {
|
171
|
-
json = null;
|
172
|
-
}
|
173
|
-
else {
|
174
|
-
String jsonObject = pageReader.getString(originalJsonColumn);
|
175
|
-
Configuration conf = Configuration.defaultConfiguration();
|
176
|
-
conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
|
177
|
-
json = JsonPath.using(conf).parse(jsonObject);
|
178
|
-
}
|
179
|
-
|
180
|
-
for (Column expandedJsonColumn: expandedJsonColumns) {
|
181
|
-
if (json == null) {
|
182
|
-
pageBuilder.setNull(expandedJsonColumn);
|
183
|
-
continue;
|
184
|
-
}
|
185
|
-
|
186
|
-
Object value = json.read(expandedJsonColumn.getName());
|
187
|
-
final String finalValue = writeJsonPathValueAsString(value);
|
188
|
-
if (finalValue == null) {
|
189
|
-
pageBuilder.setNull(expandedJsonColumn);
|
190
|
-
continue;
|
191
|
-
}
|
192
|
-
|
193
|
-
if (Types.STRING.equals(expandedJsonColumn.getType())) {
|
194
|
-
pageBuilder.setString(expandedJsonColumn, finalValue);
|
195
|
-
}
|
196
|
-
else if (Types.BOOLEAN.equals(expandedJsonColumn.getType())) {
|
197
|
-
pageBuilder.setBoolean(expandedJsonColumn, Boolean.parseBoolean(finalValue));
|
198
|
-
}
|
199
|
-
else if (Types.DOUBLE.equals(expandedJsonColumn.getType())) {
|
200
|
-
pageBuilder.setDouble(expandedJsonColumn, Double.parseDouble(finalValue));
|
201
|
-
}
|
202
|
-
else if (Types.LONG.equals(expandedJsonColumn.getType())) {
|
203
|
-
pageBuilder.setLong(expandedJsonColumn, Long.parseLong(finalValue));
|
204
|
-
}
|
205
|
-
else if (Types.TIMESTAMP.equals(expandedJsonColumn.getType())) {
|
206
|
-
TimestampParser parser = timestampParserMap.get(expandedJsonColumn.getName());
|
207
|
-
pageBuilder.setTimestamp(expandedJsonColumn, parser.parse(finalValue));
|
208
|
-
}
|
209
|
-
}
|
210
|
-
}
|
211
|
-
|
212
|
-
private String writeJsonPathValueAsString(Object value)
|
213
|
-
throws JsonProcessingException
|
214
|
-
{
|
215
|
-
if (value == null) {
|
216
|
-
return null;
|
217
|
-
}
|
218
|
-
else if (value instanceof List) {
|
219
|
-
return new ObjectMapper().writeValueAsString(value);
|
220
|
-
}
|
221
|
-
else if (value instanceof Map) {
|
222
|
-
return new ObjectMapper().writeValueAsString(value);
|
223
|
-
}
|
224
|
-
else if (value instanceof String) {
|
225
|
-
return (String) value;
|
226
|
-
}
|
227
|
-
else {
|
228
|
-
return String.valueOf(value);
|
229
|
-
}
|
230
|
-
}
|
231
|
-
|
232
|
-
};
|
75
|
+
return new FilteredPageOutput(task, inputSchema, outputSchema, output);
|
233
76
|
}
|
234
77
|
|
235
|
-
private Schema buildOutputSchema(
|
78
|
+
private Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
236
79
|
{
|
237
80
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
238
81
|
|
239
82
|
int i = 0; // columns index
|
240
83
|
for (Column inputColumn: inputSchema.getColumns()) {
|
241
|
-
if (inputColumn.getName().contentEquals(
|
84
|
+
if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
|
242
85
|
logger.info("removed column: name: {}, type: {}",
|
243
86
|
inputColumn.getName(),
|
244
87
|
inputColumn.getType());
|
245
|
-
for (ColumnConfig expandedColumnConfig:
|
88
|
+
for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
|
246
89
|
logger.info("added column: name: {}, type: {}, options: {}",
|
247
90
|
expandedColumnConfig.getName(),
|
248
91
|
expandedColumnConfig.getType(),
|
@@ -264,21 +107,4 @@ public class ExpandJsonFilterPlugin
|
|
264
107
|
return new Schema(builder.build());
|
265
108
|
}
|
266
109
|
|
267
|
-
private HashMap<String, TimestampParser> buildTimestampParserMap(ScriptingContainer jruby, List<ColumnConfig> expandedColumnConfigs, String timeZone)
|
268
|
-
{
|
269
|
-
final HashMap<String, TimestampParser> timestampParserMap = Maps.newHashMap();
|
270
|
-
for (ColumnConfig expandedColumnConfig: expandedColumnConfigs) {
|
271
|
-
if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
|
272
|
-
String format = expandedColumnConfig.getOption().get(String.class, "format");
|
273
|
-
DateTimeZone timezone = DateTimeZone.forID(timeZone);
|
274
|
-
TimestampParser parser = new TimestampParser(jruby, format, timezone);
|
275
|
-
|
276
|
-
String columnName = expandedColumnConfig.getName();
|
277
|
-
|
278
|
-
timestampParserMap.put(columnName, parser);
|
279
|
-
}
|
280
|
-
}
|
281
|
-
|
282
|
-
return timestampParserMap;
|
283
|
-
}
|
284
110
|
}
|
@@ -0,0 +1,223 @@
|
|
1
|
+
package org.embulk.filter.expand_json;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
|
+
import com.google.common.base.Throwables;
|
6
|
+
import com.google.common.collect.ImmutableList;
|
7
|
+
import com.google.common.collect.Maps;
|
8
|
+
import com.jayway.jsonpath.Configuration;
|
9
|
+
import com.jayway.jsonpath.JsonPath;
|
10
|
+
import com.jayway.jsonpath.Option;
|
11
|
+
import com.jayway.jsonpath.ReadContext;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.ColumnConfig;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.Page;
|
16
|
+
import org.embulk.spi.PageBuilder;
|
17
|
+
import org.embulk.spi.PageOutput;
|
18
|
+
import org.embulk.spi.PageReader;
|
19
|
+
import org.embulk.spi.Schema;
|
20
|
+
import org.embulk.spi.time.TimestampParser;
|
21
|
+
import org.embulk.spi.type.Types;
|
22
|
+
import org.joda.time.DateTimeZone;
|
23
|
+
import org.slf4j.Logger;
|
24
|
+
|
25
|
+
import java.util.ArrayList;
|
26
|
+
import java.util.HashMap;
|
27
|
+
import java.util.List;
|
28
|
+
import java.util.Map;
|
29
|
+
|
30
|
+
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.*;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* Created by takahiro.nakayama on 10/19/15.
|
34
|
+
*/
|
35
|
+
public class FilteredPageOutput
|
36
|
+
implements PageOutput
|
37
|
+
{
|
38
|
+
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
39
|
+
private final String jsonPathRoot;
|
40
|
+
private final List<Column> inputColumnsExceptExpandedJsonColumn;
|
41
|
+
private final List<Column> expandedJsonColumns;
|
42
|
+
private final HashMap<String, TimestampParser> timestampParserHashMap;
|
43
|
+
private final Column jsonColumn;
|
44
|
+
private final PageReader pageReader;
|
45
|
+
private final Schema inputSchema;
|
46
|
+
private final Schema outputSchema;
|
47
|
+
private final PageOutput pageOutput;
|
48
|
+
|
49
|
+
FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
|
50
|
+
{
|
51
|
+
this.jsonPathRoot = task.getRoot();
|
52
|
+
|
53
|
+
ImmutableList.Builder<Column> inputColumnsExceptExpandedJsonColumnBuilder = ImmutableList.builder();
|
54
|
+
ImmutableList.Builder<Column> expandedJsonColumnsBuilder = ImmutableList.builder();
|
55
|
+
for (Column column : outputSchema.getColumns()) {
|
56
|
+
if (inputSchema.getColumns().contains(column)) {
|
57
|
+
inputColumnsExceptExpandedJsonColumnBuilder.add(column);
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
expandedJsonColumnsBuilder.add(column);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
this.inputColumnsExceptExpandedJsonColumn = inputColumnsExceptExpandedJsonColumnBuilder.build();
|
64
|
+
this.expandedJsonColumns = expandedJsonColumnsBuilder.build();
|
65
|
+
|
66
|
+
Column temporaryJsonColumn = null;
|
67
|
+
for (Column column: inputSchema.getColumns()) {
|
68
|
+
if (column.getName().contentEquals(task.getJsonColumnName())) {
|
69
|
+
temporaryJsonColumn = column;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
this.jsonColumn = temporaryJsonColumn;
|
73
|
+
|
74
|
+
this.timestampParserHashMap = buildTimestampParserHashMap(task);
|
75
|
+
this.pageReader = new PageReader(inputSchema);
|
76
|
+
this.inputSchema = inputSchema;
|
77
|
+
this.outputSchema = outputSchema;
|
78
|
+
this.pageOutput = pageOutput;
|
79
|
+
}
|
80
|
+
|
81
|
+
@Override
|
82
|
+
public void add(Page page)
|
83
|
+
{
|
84
|
+
try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput)) {
|
85
|
+
pageReader.setPage(page);
|
86
|
+
|
87
|
+
while (pageReader.nextRecord()) {
|
88
|
+
setInputColumnsExceptFlattenJsonColumns(pageBuilder, inputColumnsExceptExpandedJsonColumn);
|
89
|
+
setExpandedJsonColumns(pageBuilder, jsonColumn, expandedJsonColumns, timestampParserHashMap);
|
90
|
+
pageBuilder.addRecord();
|
91
|
+
}
|
92
|
+
pageBuilder.finish();
|
93
|
+
}
|
94
|
+
catch (JsonProcessingException e) {
|
95
|
+
logger.error(e.getMessage());
|
96
|
+
throw Throwables.propagate(e);
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
@Override
|
101
|
+
public void finish()
|
102
|
+
{
|
103
|
+
pageOutput.finish();
|
104
|
+
}
|
105
|
+
|
106
|
+
@Override
|
107
|
+
public void close()
|
108
|
+
{
|
109
|
+
pageReader.close();
|
110
|
+
pageOutput.close();
|
111
|
+
}
|
112
|
+
|
113
|
+
private HashMap<String, TimestampParser> buildTimestampParserHashMap(PluginTask task)
|
114
|
+
{
|
115
|
+
final HashMap<String, TimestampParser> timestampParserHashMap = Maps.newHashMap();
|
116
|
+
for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
|
117
|
+
if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
|
118
|
+
String format = expandedColumnConfig.getOption().get(String.class, "format");
|
119
|
+
DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
|
120
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
121
|
+
|
122
|
+
String columnName = expandedColumnConfig.getName();
|
123
|
+
|
124
|
+
timestampParserHashMap.put(columnName, parser);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
return timestampParserHashMap;
|
129
|
+
}
|
130
|
+
|
131
|
+
private void setInputColumnsExceptFlattenJsonColumns(PageBuilder pageBuilder, List<Column> inputColumnsExceptExpandedJsonColumn) {
|
132
|
+
for (Column inputColumn: inputColumnsExceptExpandedJsonColumn) {
|
133
|
+
if (pageReader.isNull(inputColumn)) {
|
134
|
+
pageBuilder.setNull(inputColumn);
|
135
|
+
continue;
|
136
|
+
}
|
137
|
+
|
138
|
+
if (Types.STRING.equals(inputColumn.getType())) {
|
139
|
+
pageBuilder.setString(inputColumn, pageReader.getString(inputColumn));
|
140
|
+
}
|
141
|
+
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
142
|
+
pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn));
|
143
|
+
}
|
144
|
+
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
145
|
+
pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn));
|
146
|
+
}
|
147
|
+
else if (Types.LONG.equals(inputColumn.getType())) {
|
148
|
+
pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn));
|
149
|
+
}
|
150
|
+
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
151
|
+
pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn));
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
private void setExpandedJsonColumns(PageBuilder pageBuilder, Column originalJsonColumn, List<Column> expandedJsonColumns, HashMap<String, TimestampParser> timestampParserMap)
|
157
|
+
throws JsonProcessingException
|
158
|
+
{
|
159
|
+
final ReadContext json;
|
160
|
+
if (pageReader.isNull(originalJsonColumn)) {
|
161
|
+
json = null;
|
162
|
+
}
|
163
|
+
else {
|
164
|
+
String jsonObject = pageReader.getString(originalJsonColumn);
|
165
|
+
Configuration conf = Configuration.defaultConfiguration();
|
166
|
+
conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
|
167
|
+
conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
|
168
|
+
json = JsonPath.using(conf).parse(jsonObject);
|
169
|
+
}
|
170
|
+
|
171
|
+
for (Column expandedJsonColumn: expandedJsonColumns) {
|
172
|
+
if (json == null) {
|
173
|
+
pageBuilder.setNull(expandedJsonColumn);
|
174
|
+
continue;
|
175
|
+
}
|
176
|
+
|
177
|
+
Object value = json.read(jsonPathRoot + expandedJsonColumn.getName());
|
178
|
+
final String finalValue = writeJsonPathValueAsString(value);
|
179
|
+
if (finalValue == null) {
|
180
|
+
pageBuilder.setNull(expandedJsonColumn);
|
181
|
+
continue;
|
182
|
+
}
|
183
|
+
|
184
|
+
if (Types.STRING.equals(expandedJsonColumn.getType())) {
|
185
|
+
pageBuilder.setString(expandedJsonColumn, finalValue);
|
186
|
+
}
|
187
|
+
else if (Types.BOOLEAN.equals(expandedJsonColumn.getType())) {
|
188
|
+
pageBuilder.setBoolean(expandedJsonColumn, Boolean.parseBoolean(finalValue));
|
189
|
+
}
|
190
|
+
else if (Types.DOUBLE.equals(expandedJsonColumn.getType())) {
|
191
|
+
pageBuilder.setDouble(expandedJsonColumn, Double.parseDouble(finalValue));
|
192
|
+
}
|
193
|
+
else if (Types.LONG.equals(expandedJsonColumn.getType())) {
|
194
|
+
pageBuilder.setLong(expandedJsonColumn, Long.parseLong(finalValue));
|
195
|
+
}
|
196
|
+
else if (Types.TIMESTAMP.equals(expandedJsonColumn.getType())) {
|
197
|
+
TimestampParser parser = timestampParserMap.get(expandedJsonColumn.getName());
|
198
|
+
pageBuilder.setTimestamp(expandedJsonColumn, parser.parse(finalValue));
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
private String writeJsonPathValueAsString(Object value)
|
204
|
+
throws JsonProcessingException
|
205
|
+
{
|
206
|
+
if (value == null) {
|
207
|
+
return null;
|
208
|
+
}
|
209
|
+
else if (value instanceof List) {
|
210
|
+
return new ObjectMapper().writeValueAsString(value);
|
211
|
+
}
|
212
|
+
else if (value instanceof Map) {
|
213
|
+
return new ObjectMapper().writeValueAsString(value);
|
214
|
+
}
|
215
|
+
else if (value instanceof String) {
|
216
|
+
return (String) value;
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
return String.valueOf(value);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -57,10 +57,11 @@ files:
|
|
57
57
|
- gradlew.bat
|
58
58
|
- lib/embulk/filter/expand_json.rb
|
59
59
|
- src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java
|
60
|
+
- src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java
|
60
61
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
61
62
|
- classpath/asm-1.0.2.jar
|
62
63
|
- classpath/asm-3.3.1.jar
|
63
|
-
- classpath/embulk-filter-expand_json-0.0.
|
64
|
+
- classpath/embulk-filter-expand_json-0.0.2.jar
|
64
65
|
- classpath/json-path-2.0.0.jar
|
65
66
|
- classpath/json-smart-2.1.1.jar
|
66
67
|
homepage: https://github.com/civitaspo/embulk-filter-expand_json
|
Binary file
|