embulk-filter-expand_json 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -9
- data/build.gradle +1 -1
- data/classpath/embulk-filter-expand_json-0.0.2.jar +0 -0
- data/example/config.yml +10 -9
- data/src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java +9 -183
- data/src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java +223 -0
- metadata +4 -3
- data/classpath/embulk-filter-expand_json-0.0.1.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 021e96f2218ac3326f0c470f860bde498866eff9
|
4
|
+
data.tar.gz: dd085b630641bed9a2829b26af1bb03e5ad75680
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d6a8fc2b5b53e7b31d8246b231a48ff796cb4d74fcc0cadb713ff22d990327f623e337e56a97c0e0c0ffeb3b1027171199a1193962a6d6854ca552c0de78a2a
|
7
|
+
data.tar.gz: d3ad561c853b5677aa1e968007086dd1bcaf7cf6e116119b0b31906d7dd5922bf8c9920bdbdd74f2c6fcfdaa6dc0c8ffcf88c2545611189b2b734d8fcf416756
|
data/README.md
CHANGED
@@ -9,27 +9,41 @@ expand columns having json into multiple columns
|
|
9
9
|
## Configuration
|
10
10
|
|
11
11
|
- **json_column_name**: a column name having json to be expanded (string, required)
|
12
|
+
- **root**: root property to start fetching each entries, specify in [JsonPath](http://goessner.net/articles/JsonPath/) style (string, default: `\"$.\"`)
|
12
13
|
- **expanded_columns**: columns expanded into multiple columns (array of hash, required)
|
13
14
|
- **name**: name of the column. you can define [JsonPath](http://goessner.net/articles/JsonPath/) style.
|
14
15
|
- **type**: type of the column (see below)
|
15
16
|
- **format**: format of the timestamp if type is timestamp
|
16
17
|
|
18
|
+
---
|
19
|
+
**type of the column**
|
20
|
+
|
21
|
+
|name|description|
|
22
|
+
|:---|:---|
|
23
|
+
|boolean|true or false|
|
24
|
+
|long|64-bit signed integers|
|
25
|
+
|timestamp|Date and time with nano-seconds precision|
|
26
|
+
|double|64-bit floating point numbers|
|
27
|
+
|string|Strings|
|
28
|
+
|
29
|
+
|
17
30
|
## Example
|
18
31
|
|
19
32
|
```yaml
|
20
33
|
filters:
|
21
34
|
- type: expand_json
|
22
35
|
json_column_name: json_payload
|
36
|
+
root: "$."
|
23
37
|
expanded_columns:
|
24
|
-
- {name: "
|
25
|
-
- {name: "
|
26
|
-
- {name: "
|
27
|
-
- {name: "
|
28
|
-
- {name: "
|
29
|
-
- {name: "
|
30
|
-
- {name: "
|
31
|
-
- {name: "
|
32
|
-
- {name: "
|
38
|
+
- {name: "phone_numbers", type: string}
|
39
|
+
- {name: "app_id", type: long}
|
40
|
+
- {name: "point", type: double}
|
41
|
+
- {name: "created_at", type: timestamp, format: "%Y-%m-%d"}
|
42
|
+
- {name: "profile.anniversary.et", type: string}
|
43
|
+
- {name: "profile.anniversary.voluptatem", type: string}
|
44
|
+
- {name: "profile.like_words[1]", type: string}
|
45
|
+
- {name: "profile.like_words[2]", type: string}
|
46
|
+
- {name: "profile.like_words[0]", type: string}
|
33
47
|
```
|
34
48
|
|
35
49
|
## Note
|
data/build.gradle
CHANGED
Binary file
|
data/example/config.yml
CHANGED
@@ -19,16 +19,17 @@ in:
|
|
19
19
|
filters:
|
20
20
|
- type: expand_json
|
21
21
|
json_column_name: json_payload
|
22
|
+
root: "$."
|
22
23
|
expanded_columns:
|
23
|
-
- {name: "
|
24
|
-
- {name: "
|
25
|
-
- {name: "
|
26
|
-
- {name: "
|
27
|
-
- {name: "
|
28
|
-
- {name: "
|
29
|
-
- {name: "
|
30
|
-
- {name: "
|
31
|
-
- {name: "
|
24
|
+
- {name: "phone_numbers", type: string}
|
25
|
+
- {name: "app_id", type: long}
|
26
|
+
- {name: "point", type: double}
|
27
|
+
- {name: "created_at", type: timestamp, format: "%Y-%m-%d"}
|
28
|
+
- {name: "profile.anniversary.et", type: string}
|
29
|
+
- {name: "profile.anniversary", type: string}
|
30
|
+
- {name: "profile.like_words[1]", type: string}
|
31
|
+
- {name: "profile.like_words[2]", type: string}
|
32
|
+
- {name: "profile.like_words", type: string}
|
32
33
|
|
33
34
|
out:
|
34
35
|
type: stdout
|
@@ -45,6 +45,10 @@ public class ExpandJsonFilterPlugin
|
|
45
45
|
@Config("json_column_name")
|
46
46
|
public String getJsonColumnName();
|
47
47
|
|
48
|
+
@Config("root")
|
49
|
+
@ConfigDefault("\"$.\"")
|
50
|
+
public String getRoot();
|
51
|
+
|
48
52
|
@Config("expanded_columns")
|
49
53
|
public List<ColumnConfig> getExpandedColumns();
|
50
54
|
|
@@ -52,10 +56,6 @@ public class ExpandJsonFilterPlugin
|
|
52
56
|
@ConfigDefault("\"UTC\"")
|
53
57
|
public String getTimeZone();
|
54
58
|
|
55
|
-
// TODO if needed: add the original column name as the prefix of expanded
|
56
|
-
// @Config("add_original_column_name_as_prefix")
|
57
|
-
// @ConfigDefault("false")
|
58
|
-
// public String getOption2();
|
59
59
|
}
|
60
60
|
|
61
61
|
@Override
|
@@ -63,11 +63,7 @@ public class ExpandJsonFilterPlugin
|
|
63
63
|
FilterPlugin.Control control)
|
64
64
|
{
|
65
65
|
PluginTask task = config.loadConfig(PluginTask.class);
|
66
|
-
|
67
|
-
Schema outputSchema = buildOutputSchema(inputSchema,
|
68
|
-
task.getJsonColumnName(),
|
69
|
-
task.getExpandedColumns());
|
70
|
-
|
66
|
+
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
71
67
|
control.run(task.dump(), outputSchema);
|
72
68
|
}
|
73
69
|
|
@@ -76,173 +72,20 @@ public class ExpandJsonFilterPlugin
|
|
76
72
|
final Schema outputSchema, final PageOutput output)
|
77
73
|
{
|
78
74
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
79
|
-
|
80
|
-
final List<Column> inputColumns = inputSchema.getColumns();
|
81
|
-
|
82
|
-
final List<Column> inputColumnsExceptExpandedJsonColumn = new ArrayList<>();
|
83
|
-
final List<Column> expandedJsonColumns = new ArrayList<>();
|
84
|
-
|
85
|
-
for (Column column : outputSchema.getColumns()) {
|
86
|
-
if (inputColumns.contains(column)) {
|
87
|
-
inputColumnsExceptExpandedJsonColumn.add(column);
|
88
|
-
}
|
89
|
-
else {
|
90
|
-
expandedJsonColumns.add(column);
|
91
|
-
}
|
92
|
-
}
|
93
|
-
|
94
|
-
Column temporaryJsonColumn = null;
|
95
|
-
for (Column column: inputColumns) {
|
96
|
-
if (column.getName().contentEquals(task.getJsonColumnName())) {
|
97
|
-
temporaryJsonColumn = column;
|
98
|
-
}
|
99
|
-
}
|
100
|
-
final Column jsonColumn = temporaryJsonColumn;
|
101
|
-
|
102
|
-
final HashMap<String, TimestampParser> timestampParserMap = buildTimestampParserMap(task.getJRuby(),
|
103
|
-
task.getExpandedColumns(),
|
104
|
-
task.getTimeZone());
|
105
|
-
return new PageOutput()
|
106
|
-
{
|
107
|
-
private PageReader pageReader = new PageReader(inputSchema);
|
108
|
-
|
109
|
-
@Override
|
110
|
-
public void add(Page page)
|
111
|
-
{
|
112
|
-
try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output)) {
|
113
|
-
pageReader.setPage(page);
|
114
|
-
|
115
|
-
while (pageReader.nextRecord()) {
|
116
|
-
setInputColumnsExceptFlattenJsonColumns(pageBuilder, inputColumnsExceptExpandedJsonColumn);
|
117
|
-
setExpandedJsonColumns(pageBuilder, jsonColumn, expandedJsonColumns, timestampParserMap);
|
118
|
-
pageBuilder.addRecord();
|
119
|
-
}
|
120
|
-
pageBuilder.finish();
|
121
|
-
}
|
122
|
-
catch (JsonProcessingException e) {
|
123
|
-
logger.error(e.getMessage());
|
124
|
-
throw Throwables.propagate(e);
|
125
|
-
}
|
126
|
-
}
|
127
|
-
|
128
|
-
@Override
|
129
|
-
public void finish()
|
130
|
-
{
|
131
|
-
output.finish();
|
132
|
-
}
|
133
|
-
|
134
|
-
@Override
|
135
|
-
public void close()
|
136
|
-
{
|
137
|
-
pageReader.close();
|
138
|
-
output.close();
|
139
|
-
}
|
140
|
-
|
141
|
-
private void setInputColumnsExceptFlattenJsonColumns(PageBuilder pageBuilder, List<Column> inputColumnsExceptExpandedJsonColumn) {
|
142
|
-
for (Column inputColumn: inputColumnsExceptExpandedJsonColumn) {
|
143
|
-
if (pageReader.isNull(inputColumn)) {
|
144
|
-
pageBuilder.setNull(inputColumn);
|
145
|
-
continue;
|
146
|
-
}
|
147
|
-
|
148
|
-
if (Types.STRING.equals(inputColumn.getType())) {
|
149
|
-
pageBuilder.setString(inputColumn, pageReader.getString(inputColumn));
|
150
|
-
}
|
151
|
-
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
152
|
-
pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn));
|
153
|
-
}
|
154
|
-
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
155
|
-
pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn));
|
156
|
-
}
|
157
|
-
else if (Types.LONG.equals(inputColumn.getType())) {
|
158
|
-
pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn));
|
159
|
-
}
|
160
|
-
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
161
|
-
pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn));
|
162
|
-
}
|
163
|
-
}
|
164
|
-
}
|
165
|
-
|
166
|
-
private void setExpandedJsonColumns(PageBuilder pageBuilder, Column originalJsonColumn, List<Column> expandedJsonColumns, HashMap<String, TimestampParser> timestampParserMap)
|
167
|
-
throws JsonProcessingException
|
168
|
-
{
|
169
|
-
final ReadContext json;
|
170
|
-
if (pageReader.isNull(originalJsonColumn)) {
|
171
|
-
json = null;
|
172
|
-
}
|
173
|
-
else {
|
174
|
-
String jsonObject = pageReader.getString(originalJsonColumn);
|
175
|
-
Configuration conf = Configuration.defaultConfiguration();
|
176
|
-
conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
|
177
|
-
json = JsonPath.using(conf).parse(jsonObject);
|
178
|
-
}
|
179
|
-
|
180
|
-
for (Column expandedJsonColumn: expandedJsonColumns) {
|
181
|
-
if (json == null) {
|
182
|
-
pageBuilder.setNull(expandedJsonColumn);
|
183
|
-
continue;
|
184
|
-
}
|
185
|
-
|
186
|
-
Object value = json.read(expandedJsonColumn.getName());
|
187
|
-
final String finalValue = writeJsonPathValueAsString(value);
|
188
|
-
if (finalValue == null) {
|
189
|
-
pageBuilder.setNull(expandedJsonColumn);
|
190
|
-
continue;
|
191
|
-
}
|
192
|
-
|
193
|
-
if (Types.STRING.equals(expandedJsonColumn.getType())) {
|
194
|
-
pageBuilder.setString(expandedJsonColumn, finalValue);
|
195
|
-
}
|
196
|
-
else if (Types.BOOLEAN.equals(expandedJsonColumn.getType())) {
|
197
|
-
pageBuilder.setBoolean(expandedJsonColumn, Boolean.parseBoolean(finalValue));
|
198
|
-
}
|
199
|
-
else if (Types.DOUBLE.equals(expandedJsonColumn.getType())) {
|
200
|
-
pageBuilder.setDouble(expandedJsonColumn, Double.parseDouble(finalValue));
|
201
|
-
}
|
202
|
-
else if (Types.LONG.equals(expandedJsonColumn.getType())) {
|
203
|
-
pageBuilder.setLong(expandedJsonColumn, Long.parseLong(finalValue));
|
204
|
-
}
|
205
|
-
else if (Types.TIMESTAMP.equals(expandedJsonColumn.getType())) {
|
206
|
-
TimestampParser parser = timestampParserMap.get(expandedJsonColumn.getName());
|
207
|
-
pageBuilder.setTimestamp(expandedJsonColumn, parser.parse(finalValue));
|
208
|
-
}
|
209
|
-
}
|
210
|
-
}
|
211
|
-
|
212
|
-
private String writeJsonPathValueAsString(Object value)
|
213
|
-
throws JsonProcessingException
|
214
|
-
{
|
215
|
-
if (value == null) {
|
216
|
-
return null;
|
217
|
-
}
|
218
|
-
else if (value instanceof List) {
|
219
|
-
return new ObjectMapper().writeValueAsString(value);
|
220
|
-
}
|
221
|
-
else if (value instanceof Map) {
|
222
|
-
return new ObjectMapper().writeValueAsString(value);
|
223
|
-
}
|
224
|
-
else if (value instanceof String) {
|
225
|
-
return (String) value;
|
226
|
-
}
|
227
|
-
else {
|
228
|
-
return String.valueOf(value);
|
229
|
-
}
|
230
|
-
}
|
231
|
-
|
232
|
-
};
|
75
|
+
return new FilteredPageOutput(task, inputSchema, outputSchema, output);
|
233
76
|
}
|
234
77
|
|
235
|
-
private Schema buildOutputSchema(
|
78
|
+
private Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
236
79
|
{
|
237
80
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
238
81
|
|
239
82
|
int i = 0; // columns index
|
240
83
|
for (Column inputColumn: inputSchema.getColumns()) {
|
241
|
-
if (inputColumn.getName().contentEquals(
|
84
|
+
if (inputColumn.getName().contentEquals(task.getJsonColumnName())) {
|
242
85
|
logger.info("removed column: name: {}, type: {}",
|
243
86
|
inputColumn.getName(),
|
244
87
|
inputColumn.getType());
|
245
|
-
for (ColumnConfig expandedColumnConfig:
|
88
|
+
for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
|
246
89
|
logger.info("added column: name: {}, type: {}, options: {}",
|
247
90
|
expandedColumnConfig.getName(),
|
248
91
|
expandedColumnConfig.getType(),
|
@@ -264,21 +107,4 @@ public class ExpandJsonFilterPlugin
|
|
264
107
|
return new Schema(builder.build());
|
265
108
|
}
|
266
109
|
|
267
|
-
private HashMap<String, TimestampParser> buildTimestampParserMap(ScriptingContainer jruby, List<ColumnConfig> expandedColumnConfigs, String timeZone)
|
268
|
-
{
|
269
|
-
final HashMap<String, TimestampParser> timestampParserMap = Maps.newHashMap();
|
270
|
-
for (ColumnConfig expandedColumnConfig: expandedColumnConfigs) {
|
271
|
-
if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
|
272
|
-
String format = expandedColumnConfig.getOption().get(String.class, "format");
|
273
|
-
DateTimeZone timezone = DateTimeZone.forID(timeZone);
|
274
|
-
TimestampParser parser = new TimestampParser(jruby, format, timezone);
|
275
|
-
|
276
|
-
String columnName = expandedColumnConfig.getName();
|
277
|
-
|
278
|
-
timestampParserMap.put(columnName, parser);
|
279
|
-
}
|
280
|
-
}
|
281
|
-
|
282
|
-
return timestampParserMap;
|
283
|
-
}
|
284
110
|
}
|
@@ -0,0 +1,223 @@
|
|
1
|
+
package org.embulk.filter.expand_json;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
|
+
import com.fasterxml.jackson.databind.ObjectMapper;
|
5
|
+
import com.google.common.base.Throwables;
|
6
|
+
import com.google.common.collect.ImmutableList;
|
7
|
+
import com.google.common.collect.Maps;
|
8
|
+
import com.jayway.jsonpath.Configuration;
|
9
|
+
import com.jayway.jsonpath.JsonPath;
|
10
|
+
import com.jayway.jsonpath.Option;
|
11
|
+
import com.jayway.jsonpath.ReadContext;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.ColumnConfig;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.Page;
|
16
|
+
import org.embulk.spi.PageBuilder;
|
17
|
+
import org.embulk.spi.PageOutput;
|
18
|
+
import org.embulk.spi.PageReader;
|
19
|
+
import org.embulk.spi.Schema;
|
20
|
+
import org.embulk.spi.time.TimestampParser;
|
21
|
+
import org.embulk.spi.type.Types;
|
22
|
+
import org.joda.time.DateTimeZone;
|
23
|
+
import org.slf4j.Logger;
|
24
|
+
|
25
|
+
import java.util.ArrayList;
|
26
|
+
import java.util.HashMap;
|
27
|
+
import java.util.List;
|
28
|
+
import java.util.Map;
|
29
|
+
|
30
|
+
import static org.embulk.filter.expand_json.ExpandJsonFilterPlugin.*;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* Created by takahiro.nakayama on 10/19/15.
|
34
|
+
*/
|
35
|
+
public class FilteredPageOutput
|
36
|
+
implements PageOutput
|
37
|
+
{
|
38
|
+
private final Logger logger = Exec.getLogger(FilteredPageOutput.class);
|
39
|
+
private final String jsonPathRoot;
|
40
|
+
private final List<Column> inputColumnsExceptExpandedJsonColumn;
|
41
|
+
private final List<Column> expandedJsonColumns;
|
42
|
+
private final HashMap<String, TimestampParser> timestampParserHashMap;
|
43
|
+
private final Column jsonColumn;
|
44
|
+
private final PageReader pageReader;
|
45
|
+
private final Schema inputSchema;
|
46
|
+
private final Schema outputSchema;
|
47
|
+
private final PageOutput pageOutput;
|
48
|
+
|
49
|
+
FilteredPageOutput(PluginTask task, Schema inputSchema, Schema outputSchema, PageOutput pageOutput)
|
50
|
+
{
|
51
|
+
this.jsonPathRoot = task.getRoot();
|
52
|
+
|
53
|
+
ImmutableList.Builder<Column> inputColumnsExceptExpandedJsonColumnBuilder = ImmutableList.builder();
|
54
|
+
ImmutableList.Builder<Column> expandedJsonColumnsBuilder = ImmutableList.builder();
|
55
|
+
for (Column column : outputSchema.getColumns()) {
|
56
|
+
if (inputSchema.getColumns().contains(column)) {
|
57
|
+
inputColumnsExceptExpandedJsonColumnBuilder.add(column);
|
58
|
+
}
|
59
|
+
else {
|
60
|
+
expandedJsonColumnsBuilder.add(column);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
this.inputColumnsExceptExpandedJsonColumn = inputColumnsExceptExpandedJsonColumnBuilder.build();
|
64
|
+
this.expandedJsonColumns = expandedJsonColumnsBuilder.build();
|
65
|
+
|
66
|
+
Column temporaryJsonColumn = null;
|
67
|
+
for (Column column: inputSchema.getColumns()) {
|
68
|
+
if (column.getName().contentEquals(task.getJsonColumnName())) {
|
69
|
+
temporaryJsonColumn = column;
|
70
|
+
}
|
71
|
+
}
|
72
|
+
this.jsonColumn = temporaryJsonColumn;
|
73
|
+
|
74
|
+
this.timestampParserHashMap = buildTimestampParserHashMap(task);
|
75
|
+
this.pageReader = new PageReader(inputSchema);
|
76
|
+
this.inputSchema = inputSchema;
|
77
|
+
this.outputSchema = outputSchema;
|
78
|
+
this.pageOutput = pageOutput;
|
79
|
+
}
|
80
|
+
|
81
|
+
@Override
|
82
|
+
public void add(Page page)
|
83
|
+
{
|
84
|
+
try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput)) {
|
85
|
+
pageReader.setPage(page);
|
86
|
+
|
87
|
+
while (pageReader.nextRecord()) {
|
88
|
+
setInputColumnsExceptFlattenJsonColumns(pageBuilder, inputColumnsExceptExpandedJsonColumn);
|
89
|
+
setExpandedJsonColumns(pageBuilder, jsonColumn, expandedJsonColumns, timestampParserHashMap);
|
90
|
+
pageBuilder.addRecord();
|
91
|
+
}
|
92
|
+
pageBuilder.finish();
|
93
|
+
}
|
94
|
+
catch (JsonProcessingException e) {
|
95
|
+
logger.error(e.getMessage());
|
96
|
+
throw Throwables.propagate(e);
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
@Override
|
101
|
+
public void finish()
|
102
|
+
{
|
103
|
+
pageOutput.finish();
|
104
|
+
}
|
105
|
+
|
106
|
+
@Override
|
107
|
+
public void close()
|
108
|
+
{
|
109
|
+
pageReader.close();
|
110
|
+
pageOutput.close();
|
111
|
+
}
|
112
|
+
|
113
|
+
private HashMap<String, TimestampParser> buildTimestampParserHashMap(PluginTask task)
|
114
|
+
{
|
115
|
+
final HashMap<String, TimestampParser> timestampParserHashMap = Maps.newHashMap();
|
116
|
+
for (ColumnConfig expandedColumnConfig: task.getExpandedColumns()) {
|
117
|
+
if (Types.TIMESTAMP.equals(expandedColumnConfig.getType())) {
|
118
|
+
String format = expandedColumnConfig.getOption().get(String.class, "format");
|
119
|
+
DateTimeZone timezone = DateTimeZone.forID(task.getTimeZone());
|
120
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
121
|
+
|
122
|
+
String columnName = expandedColumnConfig.getName();
|
123
|
+
|
124
|
+
timestampParserHashMap.put(columnName, parser);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
return timestampParserHashMap;
|
129
|
+
}
|
130
|
+
|
131
|
+
private void setInputColumnsExceptFlattenJsonColumns(PageBuilder pageBuilder, List<Column> inputColumnsExceptExpandedJsonColumn) {
|
132
|
+
for (Column inputColumn: inputColumnsExceptExpandedJsonColumn) {
|
133
|
+
if (pageReader.isNull(inputColumn)) {
|
134
|
+
pageBuilder.setNull(inputColumn);
|
135
|
+
continue;
|
136
|
+
}
|
137
|
+
|
138
|
+
if (Types.STRING.equals(inputColumn.getType())) {
|
139
|
+
pageBuilder.setString(inputColumn, pageReader.getString(inputColumn));
|
140
|
+
}
|
141
|
+
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
142
|
+
pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn));
|
143
|
+
}
|
144
|
+
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
145
|
+
pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn));
|
146
|
+
}
|
147
|
+
else if (Types.LONG.equals(inputColumn.getType())) {
|
148
|
+
pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn));
|
149
|
+
}
|
150
|
+
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
151
|
+
pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn));
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
private void setExpandedJsonColumns(PageBuilder pageBuilder, Column originalJsonColumn, List<Column> expandedJsonColumns, HashMap<String, TimestampParser> timestampParserMap)
|
157
|
+
throws JsonProcessingException
|
158
|
+
{
|
159
|
+
final ReadContext json;
|
160
|
+
if (pageReader.isNull(originalJsonColumn)) {
|
161
|
+
json = null;
|
162
|
+
}
|
163
|
+
else {
|
164
|
+
String jsonObject = pageReader.getString(originalJsonColumn);
|
165
|
+
Configuration conf = Configuration.defaultConfiguration();
|
166
|
+
conf = conf.addOptions(Option.DEFAULT_PATH_LEAF_TO_NULL);
|
167
|
+
conf = conf.addOptions(Option.SUPPRESS_EXCEPTIONS);
|
168
|
+
json = JsonPath.using(conf).parse(jsonObject);
|
169
|
+
}
|
170
|
+
|
171
|
+
for (Column expandedJsonColumn: expandedJsonColumns) {
|
172
|
+
if (json == null) {
|
173
|
+
pageBuilder.setNull(expandedJsonColumn);
|
174
|
+
continue;
|
175
|
+
}
|
176
|
+
|
177
|
+
Object value = json.read(jsonPathRoot + expandedJsonColumn.getName());
|
178
|
+
final String finalValue = writeJsonPathValueAsString(value);
|
179
|
+
if (finalValue == null) {
|
180
|
+
pageBuilder.setNull(expandedJsonColumn);
|
181
|
+
continue;
|
182
|
+
}
|
183
|
+
|
184
|
+
if (Types.STRING.equals(expandedJsonColumn.getType())) {
|
185
|
+
pageBuilder.setString(expandedJsonColumn, finalValue);
|
186
|
+
}
|
187
|
+
else if (Types.BOOLEAN.equals(expandedJsonColumn.getType())) {
|
188
|
+
pageBuilder.setBoolean(expandedJsonColumn, Boolean.parseBoolean(finalValue));
|
189
|
+
}
|
190
|
+
else if (Types.DOUBLE.equals(expandedJsonColumn.getType())) {
|
191
|
+
pageBuilder.setDouble(expandedJsonColumn, Double.parseDouble(finalValue));
|
192
|
+
}
|
193
|
+
else if (Types.LONG.equals(expandedJsonColumn.getType())) {
|
194
|
+
pageBuilder.setLong(expandedJsonColumn, Long.parseLong(finalValue));
|
195
|
+
}
|
196
|
+
else if (Types.TIMESTAMP.equals(expandedJsonColumn.getType())) {
|
197
|
+
TimestampParser parser = timestampParserMap.get(expandedJsonColumn.getName());
|
198
|
+
pageBuilder.setTimestamp(expandedJsonColumn, parser.parse(finalValue));
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
private String writeJsonPathValueAsString(Object value)
|
204
|
+
throws JsonProcessingException
|
205
|
+
{
|
206
|
+
if (value == null) {
|
207
|
+
return null;
|
208
|
+
}
|
209
|
+
else if (value instanceof List) {
|
210
|
+
return new ObjectMapper().writeValueAsString(value);
|
211
|
+
}
|
212
|
+
else if (value instanceof Map) {
|
213
|
+
return new ObjectMapper().writeValueAsString(value);
|
214
|
+
}
|
215
|
+
else if (value instanceof String) {
|
216
|
+
return (String) value;
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
return String.valueOf(value);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-expand_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Civitaspo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -57,10 +57,11 @@ files:
|
|
57
57
|
- gradlew.bat
|
58
58
|
- lib/embulk/filter/expand_json.rb
|
59
59
|
- src/main/java/org/embulk/filter/expand_json/ExpandJsonFilterPlugin.java
|
60
|
+
- src/main/java/org/embulk/filter/expand_json/FilteredPageOutput.java
|
60
61
|
- src/test/java/org/embulk/filter/expand_json/TestExpandJsonFilterPlugin.java
|
61
62
|
- classpath/asm-1.0.2.jar
|
62
63
|
- classpath/asm-3.3.1.jar
|
63
|
-
- classpath/embulk-filter-expand_json-0.0.
|
64
|
+
- classpath/embulk-filter-expand_json-0.0.2.jar
|
64
65
|
- classpath/json-path-2.0.0.jar
|
65
66
|
- classpath/json-smart-2.1.1.jar
|
66
67
|
homepage: https://github.com/civitaspo/embulk-filter-expand_json
|
Binary file
|