embulk-filter-column 0.4.0 → 0.5.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +6 -0
- data/README.md +23 -1
- data/build.gradle +1 -1
- data/example/columns.yml +2 -1
- data/example/example.yml +35 -0
- data/example/json_add_columns.yml +31 -0
- data/example/json_columns.yml +23 -0
- data/example/json_drop_columns.yml +22 -0
- data/gradlew +3 -3
- data/lib/embulk/filter/column.rb +1 -1
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/filter/column/ColumnFilterPlugin.java +260 -0
- data/src/main/java/org/embulk/filter/column/ColumnVisitorImpl.java +275 -0
- data/src/main/java/org/embulk/filter/column/JsonColumn.java +104 -0
- data/src/main/java/org/embulk/filter/column/JsonVisitor.java +328 -0
- metadata +14 -6
- data/src/main/java/org/embulk/filter/ColumnFilterPlugin.java +0 -462
@@ -0,0 +1,275 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import com.google.common.base.Throwables;
|
4
|
+
|
5
|
+
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
6
|
+
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
7
|
+
|
8
|
+
import org.embulk.spi.Column;
|
9
|
+
import org.embulk.spi.ColumnVisitor;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.PageBuilder;
|
12
|
+
import org.embulk.spi.PageReader;
|
13
|
+
import org.embulk.spi.Schema;
|
14
|
+
import org.embulk.spi.SchemaConfigException;
|
15
|
+
import org.embulk.spi.json.JsonParser;
|
16
|
+
import org.embulk.spi.time.Timestamp;
|
17
|
+
import org.embulk.spi.time.TimestampParseException;
|
18
|
+
import org.embulk.spi.time.TimestampParser;
|
19
|
+
import org.embulk.spi.type.BooleanType;
|
20
|
+
import org.embulk.spi.type.DoubleType;
|
21
|
+
import org.embulk.spi.type.JsonType;
|
22
|
+
import org.embulk.spi.type.LongType;
|
23
|
+
import org.embulk.spi.type.StringType;
|
24
|
+
import org.embulk.spi.type.TimestampType;
|
25
|
+
import org.embulk.spi.type.Type;
|
26
|
+
|
27
|
+
import org.joda.time.DateTimeZone;
|
28
|
+
import org.msgpack.value.Value;
|
29
|
+
import org.slf4j.Logger;
|
30
|
+
|
31
|
+
import java.util.HashMap;
|
32
|
+
import java.util.List;
|
33
|
+
|
34
|
+
public class ColumnVisitorImpl implements ColumnVisitor
|
35
|
+
{
|
36
|
+
private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
37
|
+
private final PluginTask task;
|
38
|
+
private final Schema inputSchema;
|
39
|
+
private final Schema outputSchema;
|
40
|
+
private final PageReader pageReader;
|
41
|
+
private final PageBuilder pageBuilder;
|
42
|
+
private final HashMap<Column, Column> outputInputColumnMap = new HashMap<>();
|
43
|
+
private final HashMap<Column, Object> outputDefaultMap = new HashMap<>();
|
44
|
+
private final JsonVisitor jsonVisitor;
|
45
|
+
|
46
|
+
ColumnVisitorImpl(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
|
47
|
+
{
|
48
|
+
this.task = task;
|
49
|
+
this.inputSchema = inputSchema;
|
50
|
+
this.outputSchema = outputSchema;
|
51
|
+
this.pageReader = pageReader;
|
52
|
+
this.pageBuilder = pageBuilder;
|
53
|
+
buildOutputInputColumnMap();
|
54
|
+
buildOutputDefaultMap();
|
55
|
+
this.jsonVisitor = new JsonVisitor(task, inputSchema, outputSchema);
|
56
|
+
}
|
57
|
+
|
58
|
+
// Map outputColumn => inputColumn
|
59
|
+
private void buildOutputInputColumnMap()
|
60
|
+
{
|
61
|
+
for (Column outputColumn : outputSchema.getColumns()) {
|
62
|
+
String name = outputColumn.getName();
|
63
|
+
String srcName = getSrc(name, task.getColumns());
|
64
|
+
if (srcName == null) {
|
65
|
+
srcName = getSrc(name, task.getAddColumns());
|
66
|
+
}
|
67
|
+
if (srcName == null) {
|
68
|
+
srcName = name;
|
69
|
+
}
|
70
|
+
Column inputColumn;
|
71
|
+
try {
|
72
|
+
inputColumn = inputSchema.lookupColumn(srcName);
|
73
|
+
}
|
74
|
+
catch (SchemaConfigException ex) {
|
75
|
+
inputColumn = null;
|
76
|
+
}
|
77
|
+
outputInputColumnMap.put(outputColumn, inputColumn); // NOTE: inputColumn would be null
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
// Map outputColumn => default value if present
|
82
|
+
private void buildOutputDefaultMap()
|
83
|
+
{
|
84
|
+
for (Column outputColumn : outputSchema.getColumns()) {
|
85
|
+
String name = outputColumn.getName();
|
86
|
+
Type type = outputColumn.getType();
|
87
|
+
|
88
|
+
Object defaultValue = getDefault(task, name, type, task.getColumns());
|
89
|
+
if (defaultValue == null) {
|
90
|
+
defaultValue = getDefault(task, name, type, task.getAddColumns());
|
91
|
+
}
|
92
|
+
if (defaultValue != null) {
|
93
|
+
outputDefaultMap.put(outputColumn, defaultValue);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
static String getSrc(String name, List<ColumnConfig> columnConfigs)
|
99
|
+
{
|
100
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
101
|
+
if (columnConfig.getName().equals(name) &&
|
102
|
+
columnConfig.getSrc().isPresent()) {
|
103
|
+
return columnConfig.getSrc().get();
|
104
|
+
}
|
105
|
+
}
|
106
|
+
return null;
|
107
|
+
}
|
108
|
+
|
109
|
+
static Object getDefault(PluginTask task, String name, Type type, List<ColumnConfig> columnConfigs)
|
110
|
+
{
|
111
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
112
|
+
if (columnConfig.getName().equals(name)) {
|
113
|
+
return getDefault(task, name, type, columnConfig);
|
114
|
+
}
|
115
|
+
}
|
116
|
+
return null;
|
117
|
+
}
|
118
|
+
|
119
|
+
static Object getDefault(PluginTask task, String name, Type type, ColumnConfig columnConfig)
|
120
|
+
{
|
121
|
+
if (type instanceof BooleanType) {
|
122
|
+
if (columnConfig.getDefault().isPresent()) {
|
123
|
+
return (Boolean) columnConfig.getDefault().get();
|
124
|
+
}
|
125
|
+
}
|
126
|
+
else if (type instanceof LongType) {
|
127
|
+
if (columnConfig.getDefault().isPresent()) {
|
128
|
+
return new Long(columnConfig.getDefault().get().toString());
|
129
|
+
}
|
130
|
+
}
|
131
|
+
else if (type instanceof DoubleType) {
|
132
|
+
if (columnConfig.getDefault().isPresent()) {
|
133
|
+
return new Double(columnConfig.getDefault().get().toString());
|
134
|
+
}
|
135
|
+
}
|
136
|
+
else if (type instanceof StringType) {
|
137
|
+
if (columnConfig.getDefault().isPresent()) {
|
138
|
+
return columnConfig.getDefault().get();
|
139
|
+
}
|
140
|
+
}
|
141
|
+
else if (type instanceof JsonType) {
|
142
|
+
if (columnConfig.getDefault().isPresent()) {
|
143
|
+
JsonParser parser = new JsonParser();
|
144
|
+
return parser.parse((String) columnConfig.getDefault().get());
|
145
|
+
}
|
146
|
+
}
|
147
|
+
else if (type instanceof TimestampType) {
|
148
|
+
if (columnConfig.getDefault().isPresent()) {
|
149
|
+
String time = (String) columnConfig.getDefault().get();
|
150
|
+
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
151
|
+
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
152
|
+
TimestampParser parser = new TimestampParser(task.getJRuby(), format, timezone);
|
153
|
+
try {
|
154
|
+
Timestamp defaultValue = parser.parse(time);
|
155
|
+
return defaultValue;
|
156
|
+
}
|
157
|
+
catch (TimestampParseException ex) {
|
158
|
+
throw Throwables.propagate(ex);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
}
|
162
|
+
return null;
|
163
|
+
}
|
164
|
+
|
165
|
+
@Override
|
166
|
+
public void booleanColumn(Column outputColumn)
|
167
|
+
{
|
168
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
169
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
170
|
+
Boolean defaultValue = (Boolean) outputDefaultMap.get(outputColumn);
|
171
|
+
if (defaultValue == null) {
|
172
|
+
pageBuilder.setNull(outputColumn);
|
173
|
+
}
|
174
|
+
else {
|
175
|
+
pageBuilder.setBoolean(outputColumn, defaultValue.booleanValue());
|
176
|
+
}
|
177
|
+
}
|
178
|
+
else {
|
179
|
+
pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
@Override
|
184
|
+
public void longColumn(Column outputColumn)
|
185
|
+
{
|
186
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
187
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
188
|
+
Long defaultValue = (Long) outputDefaultMap.get(outputColumn);
|
189
|
+
if (defaultValue == null) {
|
190
|
+
pageBuilder.setNull(outputColumn);
|
191
|
+
}
|
192
|
+
else {
|
193
|
+
pageBuilder.setLong(outputColumn, defaultValue.longValue());
|
194
|
+
}
|
195
|
+
}
|
196
|
+
else {
|
197
|
+
pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
@Override
|
202
|
+
public void doubleColumn(Column outputColumn)
|
203
|
+
{
|
204
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
205
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
206
|
+
Double defaultValue = (Double) outputDefaultMap.get(outputColumn);
|
207
|
+
if (defaultValue == null) {
|
208
|
+
pageBuilder.setNull(outputColumn);
|
209
|
+
}
|
210
|
+
else {
|
211
|
+
pageBuilder.setDouble(outputColumn, defaultValue.doubleValue());
|
212
|
+
}
|
213
|
+
}
|
214
|
+
else {
|
215
|
+
pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
@Override
|
220
|
+
public void stringColumn(Column outputColumn)
|
221
|
+
{
|
222
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
223
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
224
|
+
String defaultValue = (String) outputDefaultMap.get(outputColumn);
|
225
|
+
if (defaultValue == null) {
|
226
|
+
pageBuilder.setNull(outputColumn);
|
227
|
+
}
|
228
|
+
else {
|
229
|
+
pageBuilder.setString(outputColumn, defaultValue);
|
230
|
+
}
|
231
|
+
}
|
232
|
+
else {
|
233
|
+
pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
@Override
|
238
|
+
public void jsonColumn(Column outputColumn)
|
239
|
+
{
|
240
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
241
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
242
|
+
Value defaultValue = (Value) outputDefaultMap.get(outputColumn);
|
243
|
+
if (defaultValue == null) {
|
244
|
+
pageBuilder.setNull(outputColumn);
|
245
|
+
}
|
246
|
+
else {
|
247
|
+
String jsonPath = new StringBuilder("$.").append(inputColumn.getName()).toString();
|
248
|
+
pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, defaultValue));
|
249
|
+
}
|
250
|
+
}
|
251
|
+
else {
|
252
|
+
Value value = pageReader.getJson(inputColumn);
|
253
|
+
String jsonPath = new StringBuilder("$.").append(inputColumn.getName()).toString();
|
254
|
+
pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, value));
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
@Override
|
259
|
+
public void timestampColumn(Column outputColumn)
|
260
|
+
{
|
261
|
+
Column inputColumn = outputInputColumnMap.get(outputColumn);
|
262
|
+
if (inputColumn == null || pageReader.isNull(inputColumn)) {
|
263
|
+
Timestamp defaultValue = (Timestamp) outputDefaultMap.get(outputColumn);
|
264
|
+
if (defaultValue == null) {
|
265
|
+
pageBuilder.setNull(outputColumn);
|
266
|
+
}
|
267
|
+
else {
|
268
|
+
pageBuilder.setTimestamp(outputColumn, defaultValue);
|
269
|
+
}
|
270
|
+
}
|
271
|
+
else {
|
272
|
+
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
273
|
+
}
|
274
|
+
}
|
275
|
+
}
|
@@ -0,0 +1,104 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import org.embulk.spi.type.Type;
|
4
|
+
import org.msgpack.value.StringValue;
|
5
|
+
import org.msgpack.value.Value;
|
6
|
+
import org.msgpack.value.ValueFactory;
|
7
|
+
|
8
|
+
public class JsonColumn
|
9
|
+
{
|
10
|
+
private final String name;
|
11
|
+
private final Type type;
|
12
|
+
private final Value defaultValue;
|
13
|
+
private String objectPath = null; // object path (like directory) of json path
|
14
|
+
private String elementPath = null; // element path (like leaf) of json path
|
15
|
+
private StringValue nameValue = null;
|
16
|
+
private StringValue objectPathValue = null;
|
17
|
+
private StringValue elementPathValue = null;
|
18
|
+
|
19
|
+
public JsonColumn(
|
20
|
+
String name,
|
21
|
+
Type type)
|
22
|
+
{
|
23
|
+
this(name, type, null);
|
24
|
+
}
|
25
|
+
|
26
|
+
public JsonColumn(
|
27
|
+
String name,
|
28
|
+
Type type,
|
29
|
+
Value defaultValue)
|
30
|
+
{
|
31
|
+
this.name = name;
|
32
|
+
this.type = type;
|
33
|
+
this.defaultValue = (defaultValue == null ? ValueFactory.newNil() : defaultValue);
|
34
|
+
this.objectPath = objectPath(name);
|
35
|
+
this.elementPath = elementPath(name);
|
36
|
+
this.nameValue = ValueFactory.newString(name);
|
37
|
+
this.objectPathValue = ValueFactory.newString(objectPath);
|
38
|
+
this.elementPathValue = ValueFactory.newString(elementPath);
|
39
|
+
}
|
40
|
+
|
41
|
+
public String getName()
|
42
|
+
{
|
43
|
+
return name;
|
44
|
+
}
|
45
|
+
|
46
|
+
public Type getType()
|
47
|
+
{
|
48
|
+
return type;
|
49
|
+
}
|
50
|
+
|
51
|
+
public Value getDefaultValue()
|
52
|
+
{
|
53
|
+
return defaultValue;
|
54
|
+
}
|
55
|
+
|
56
|
+
public String getObjectPath()
|
57
|
+
{
|
58
|
+
return objectPath;
|
59
|
+
}
|
60
|
+
|
61
|
+
public String getElementPath()
|
62
|
+
{
|
63
|
+
return elementPath;
|
64
|
+
}
|
65
|
+
|
66
|
+
public StringValue getNameValue()
|
67
|
+
{
|
68
|
+
return nameValue;
|
69
|
+
}
|
70
|
+
|
71
|
+
public StringValue getObjectPathValue()
|
72
|
+
{
|
73
|
+
return objectPathValue;
|
74
|
+
}
|
75
|
+
|
76
|
+
public StringValue getElementPathValue()
|
77
|
+
{
|
78
|
+
return elementPathValue;
|
79
|
+
}
|
80
|
+
|
81
|
+
public static String objectPath(String path)
|
82
|
+
{
|
83
|
+
String[] parts = path.split("\\.");
|
84
|
+
StringBuilder builder = new StringBuilder();
|
85
|
+
builder.append(parts[0]);
|
86
|
+
for (int i = 1; i < parts.length - 1; i++) {
|
87
|
+
builder.append(".").append(parts[i]);
|
88
|
+
}
|
89
|
+
if (parts[parts.length - 1].contains("[")) {
|
90
|
+
String[] arrayParts = parts[parts.length - 1].split("\\[");
|
91
|
+
builder.append(".").append(arrayParts[0]);
|
92
|
+
for (int j = 1; j < arrayParts.length - 1; j++) {
|
93
|
+
builder.append("[").append(arrayParts[j]);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
return builder.toString();
|
97
|
+
}
|
98
|
+
|
99
|
+
public static String elementPath(String path)
|
100
|
+
{
|
101
|
+
String[] parts = path.split("\\.");
|
102
|
+
return parts[parts.length - 1];
|
103
|
+
}
|
104
|
+
}
|
@@ -0,0 +1,328 @@
|
|
1
|
+
package org.embulk.filter.column;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigException;
|
4
|
+
import org.embulk.filter.column.ColumnFilterPlugin.ColumnConfig;
|
5
|
+
import org.embulk.filter.column.ColumnFilterPlugin.PluginTask;
|
6
|
+
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.embulk.spi.Schema;
|
9
|
+
import org.embulk.spi.SchemaConfigException;
|
10
|
+
import org.embulk.spi.type.BooleanType;
|
11
|
+
import org.embulk.spi.type.DoubleType;
|
12
|
+
import org.embulk.spi.type.JsonType;
|
13
|
+
import org.embulk.spi.type.LongType;
|
14
|
+
import org.embulk.spi.type.StringType;
|
15
|
+
import org.embulk.spi.type.TimestampType;
|
16
|
+
import org.embulk.spi.type.Type;
|
17
|
+
import org.msgpack.value.ArrayValue;
|
18
|
+
import org.msgpack.value.MapValue;
|
19
|
+
import org.msgpack.value.Value;
|
20
|
+
import org.msgpack.value.ValueFactory;
|
21
|
+
|
22
|
+
import org.slf4j.Logger;
|
23
|
+
|
24
|
+
import java.util.ArrayList;
|
25
|
+
import java.util.HashMap;
|
26
|
+
import java.util.HashSet;
|
27
|
+
import java.util.LinkedHashMap;
|
28
|
+
import java.util.List;
|
29
|
+
import java.util.Map;
|
30
|
+
|
31
|
+
public class JsonVisitor
|
32
|
+
{
|
33
|
+
private static final Logger logger = Exec.getLogger(ColumnFilterPlugin.class);
|
34
|
+
private final PluginTask task;
|
35
|
+
private final Schema inputSchema;
|
36
|
+
private final Schema outputSchema;
|
37
|
+
private final HashSet<String> shouldVisitSet = new HashSet<>();
|
38
|
+
private final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonColumns = new HashMap<>();
|
39
|
+
private final HashMap<String, LinkedHashMap<String, JsonColumn>> jsonAddColumns = new HashMap<>();
|
40
|
+
private final HashMap<String, HashSet<String>> jsonDropColumns = new HashMap<>();
|
41
|
+
|
42
|
+
JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
|
43
|
+
{
|
44
|
+
this.task = task;
|
45
|
+
this.inputSchema = inputSchema;
|
46
|
+
this.outputSchema = outputSchema;
|
47
|
+
|
48
|
+
buildShouldVisitSet();
|
49
|
+
buildJsonSchema();
|
50
|
+
}
|
51
|
+
|
52
|
+
static Value getDefault(PluginTask task, String name, Type type, ColumnConfig columnConfig)
|
53
|
+
{
|
54
|
+
Object defaultValue = ColumnVisitorImpl.getDefault(task, name, type, columnConfig);
|
55
|
+
if (defaultValue == null) {
|
56
|
+
return ValueFactory.newNil();
|
57
|
+
}
|
58
|
+
if (type instanceof BooleanType) {
|
59
|
+
return ValueFactory.newBoolean((Boolean) defaultValue);
|
60
|
+
}
|
61
|
+
else if (type instanceof LongType) {
|
62
|
+
return ValueFactory.newInteger((Long) defaultValue);
|
63
|
+
}
|
64
|
+
else if (type instanceof DoubleType) {
|
65
|
+
return ValueFactory.newFloat((Double) defaultValue);
|
66
|
+
}
|
67
|
+
else if (type instanceof StringType) {
|
68
|
+
return ValueFactory.newString((String) defaultValue.toString());
|
69
|
+
}
|
70
|
+
else if (type instanceof JsonType) {
|
71
|
+
return (Value) defaultValue;
|
72
|
+
}
|
73
|
+
else if (type instanceof TimestampType) {
|
74
|
+
throw new ConfigException("type: timestamp is not available in json path");
|
75
|
+
}
|
76
|
+
else {
|
77
|
+
throw new ConfigException(String.format("type: '%s' is not supported", type));
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
private void jsonColumnsPut(String path, JsonColumn value)
|
82
|
+
{
|
83
|
+
String objectPath = JsonColumn.objectPath(path);
|
84
|
+
if (! jsonColumns.containsKey(objectPath)) {
|
85
|
+
jsonColumns.put(objectPath, new LinkedHashMap<String, JsonColumn>());
|
86
|
+
}
|
87
|
+
jsonColumns.get(objectPath).put(path, value);
|
88
|
+
}
|
89
|
+
|
90
|
+
private void jsonAddColumnsPut(String path, JsonColumn value)
|
91
|
+
{
|
92
|
+
String objectPath = JsonColumn.objectPath(path);
|
93
|
+
if (! jsonAddColumns.containsKey(objectPath)) {
|
94
|
+
jsonAddColumns.put(objectPath, new LinkedHashMap<String, JsonColumn>());
|
95
|
+
}
|
96
|
+
jsonAddColumns.get(objectPath).put(path, value);
|
97
|
+
}
|
98
|
+
|
99
|
+
private void jsonDropColumnsPut(String path)
|
100
|
+
{
|
101
|
+
String objectPath = JsonColumn.objectPath(path);
|
102
|
+
if (! jsonDropColumns.containsKey(objectPath)) {
|
103
|
+
jsonDropColumns.put(objectPath, new HashSet<String>());
|
104
|
+
}
|
105
|
+
jsonDropColumns.get(objectPath).add(path);
|
106
|
+
}
|
107
|
+
|
108
|
+
// build jsonColumns, jsonAddColumns, and jsonDropColumns
|
109
|
+
private void buildJsonSchema()
|
110
|
+
{
|
111
|
+
List<ColumnConfig> columns = task.getColumns();
|
112
|
+
List<ColumnConfig> addColumns = task.getAddColumns();
|
113
|
+
List<ColumnConfig> dropColumns = task.getDropColumns();
|
114
|
+
|
115
|
+
int i = 0;
|
116
|
+
if (dropColumns.size() > 0) {
|
117
|
+
for (ColumnConfig dropColumn : dropColumns) {
|
118
|
+
String name = dropColumn.getName();
|
119
|
+
// skip NON json path notation to build output schema
|
120
|
+
if (! name.startsWith("$.")) {
|
121
|
+
continue;
|
122
|
+
}
|
123
|
+
jsonDropColumnsPut(name);
|
124
|
+
}
|
125
|
+
}
|
126
|
+
else if (columns.size() > 0) {
|
127
|
+
for (ColumnConfig column : columns) {
|
128
|
+
String name = column.getName();
|
129
|
+
// skip NON json path notation to build output schema
|
130
|
+
if (! name.startsWith("$.")) {
|
131
|
+
continue;
|
132
|
+
}
|
133
|
+
if (column.getSrc().isPresent()) {
|
134
|
+
throw new ConfigException(String.format("columns: src is not supported for json path yet: '%s'", name));
|
135
|
+
}
|
136
|
+
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
137
|
+
Type type = column.getType().get();
|
138
|
+
Value defaultValue = getDefault(task, name, type, column);
|
139
|
+
jsonColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
140
|
+
}
|
141
|
+
else {
|
142
|
+
Type type = column.getType().isPresent() ? column.getType().get() : null;
|
143
|
+
jsonColumnsPut(name, new JsonColumn(name, type));
|
144
|
+
}
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
// Add columns to last. If you want to add to head or middle, you can use `columns` option
|
149
|
+
if (addColumns.size() > 0) {
|
150
|
+
for (ColumnConfig column : addColumns) {
|
151
|
+
String name = column.getName();
|
152
|
+
// skip NON json path notation to build output schema
|
153
|
+
if (! name.startsWith("$.")) {
|
154
|
+
continue;
|
155
|
+
}
|
156
|
+
if (column.getSrc().isPresent()) {
|
157
|
+
throw new ConfigException(String.format("add_columns: src is not supported for json path yet: '%s'", name));
|
158
|
+
}
|
159
|
+
else if (column.getType().isPresent() && column.getDefault().isPresent()) { // add column
|
160
|
+
Type type = column.getType().get();
|
161
|
+
Value defaultValue = getDefault(task, name, type, column);
|
162
|
+
jsonAddColumnsPut(name, new JsonColumn(name, type, defaultValue));
|
163
|
+
}
|
164
|
+
else {
|
165
|
+
throw new SchemaConfigException(String.format("add_columns: Column '%s' does not have \"type\" and \"default\"", name));
|
166
|
+
}
|
167
|
+
}
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
// json partial path => Boolean to avoid unnecessary type: json visit
|
172
|
+
private void buildShouldVisitSet()
|
173
|
+
{
|
174
|
+
ArrayList<ColumnConfig> columnConfigs = new ArrayList<>(task.getColumns());
|
175
|
+
columnConfigs.addAll(task.getAddColumns());
|
176
|
+
columnConfigs.addAll(task.getDropColumns());
|
177
|
+
|
178
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
179
|
+
String name = columnConfig.getName();
|
180
|
+
if (!name.startsWith("$.")) {
|
181
|
+
continue;
|
182
|
+
}
|
183
|
+
String[] parts = name.split("\\.");
|
184
|
+
StringBuilder partialPath = new StringBuilder("$");
|
185
|
+
for (int i = 1; i < parts.length; i++) {
|
186
|
+
if (parts[i].contains("[")) {
|
187
|
+
String[] arrayParts = parts[i].split("\\[");
|
188
|
+
partialPath.append(".").append(arrayParts[0]);
|
189
|
+
this.shouldVisitSet.add(partialPath.toString());
|
190
|
+
for (int j = 1; j < arrayParts.length; j++) {
|
191
|
+
// Supports both [0] and [*]
|
192
|
+
partialPath.append("[").append(arrayParts[j]);
|
193
|
+
this.shouldVisitSet.add(partialPath.toString());
|
194
|
+
}
|
195
|
+
}
|
196
|
+
else {
|
197
|
+
partialPath.append(".").append(parts[i]);
|
198
|
+
this.shouldVisitSet.add(partialPath.toString());
|
199
|
+
}
|
200
|
+
}
|
201
|
+
}
|
202
|
+
}
|
203
|
+
|
204
|
+
boolean shouldVisit(String jsonPath)
|
205
|
+
{
|
206
|
+
return shouldVisitSet.contains(jsonPath);
|
207
|
+
}
|
208
|
+
|
209
|
+
String newArrayJsonPath(String rootPath, int i)
|
210
|
+
{
|
211
|
+
String newPath = new StringBuilder(rootPath).append("[").append(Integer.toString(i)).append("]").toString();
|
212
|
+
if (! shouldVisit(newPath)) {
|
213
|
+
newPath = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too
|
214
|
+
}
|
215
|
+
return newPath;
|
216
|
+
}
|
217
|
+
|
218
|
+
String newMapJsonPath(String rootPath, Value elementPathValue)
|
219
|
+
{
|
220
|
+
String elementPath = elementPathValue.asStringValue().asString();
|
221
|
+
String newPath = new StringBuilder(rootPath).append(".").append(elementPath).toString();
|
222
|
+
return newPath;
|
223
|
+
}
|
224
|
+
|
225
|
+
Value visitArray(String rootPath, ArrayValue arrayValue)
|
226
|
+
{
|
227
|
+
int size = arrayValue.size();
|
228
|
+
ArrayList<Value> newValue = new ArrayList<>(size);
|
229
|
+
int j = 0;
|
230
|
+
if (this.jsonDropColumns.containsKey(rootPath)) {
|
231
|
+
HashSet<String> jsonDropColumns = this.jsonDropColumns.get(rootPath);
|
232
|
+
for (int i = 0; i < size; i++) {
|
233
|
+
String newPath = newArrayJsonPath(rootPath, i);
|
234
|
+
if (! jsonDropColumns.contains(newPath)) {
|
235
|
+
newValue.add(j++, visit(newPath, arrayValue.get(i)));
|
236
|
+
}
|
237
|
+
}
|
238
|
+
}
|
239
|
+
else if (this.jsonColumns.containsKey(rootPath)) {
|
240
|
+
LinkedHashMap<String, JsonColumn> jsonColumns = this.jsonColumns.get(rootPath);
|
241
|
+
for (int i = 0; i < size; i++) {
|
242
|
+
String newPath = newArrayJsonPath(rootPath, i);
|
243
|
+
if (jsonColumns.containsKey(newPath)) {
|
244
|
+
newValue.add(j++, visit(newPath, arrayValue.get(i)));
|
245
|
+
}
|
246
|
+
}
|
247
|
+
}
|
248
|
+
else {
|
249
|
+
for (int i = 0; i < size; i++) {
|
250
|
+
String newPath = newArrayJsonPath(rootPath, i);
|
251
|
+
newValue.add(j++, visit(newPath, arrayValue.get(i)));
|
252
|
+
}
|
253
|
+
}
|
254
|
+
if (this.jsonAddColumns.containsKey(rootPath)) {
|
255
|
+
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
256
|
+
newValue.add(j++, jsonColumn.getDefaultValue());
|
257
|
+
}
|
258
|
+
}
|
259
|
+
return ValueFactory.newArray(newValue.toArray(new Value[0]), true);
|
260
|
+
}
|
261
|
+
|
262
|
+
Value visitMap(String rootPath, MapValue mapValue)
|
263
|
+
{
|
264
|
+
int size = mapValue.size();
|
265
|
+
int i = 0;
|
266
|
+
ArrayList<Value> newValue = new ArrayList<>(size * 2);
|
267
|
+
if (this.jsonDropColumns.containsKey(rootPath)) {
|
268
|
+
HashSet<String> jsonDropColumns = this.jsonDropColumns.get(rootPath);
|
269
|
+
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
270
|
+
Value k = entry.getKey();
|
271
|
+
Value v = entry.getValue();
|
272
|
+
String newPath = newMapJsonPath(rootPath, k);
|
273
|
+
if (! jsonDropColumns.contains(newPath)) {
|
274
|
+
Value visited = visit(newPath, v);
|
275
|
+
newValue.add(i++, k);
|
276
|
+
newValue.add(i++, visited);
|
277
|
+
}
|
278
|
+
}
|
279
|
+
}
|
280
|
+
else if (this.jsonColumns.containsKey(rootPath)) {
|
281
|
+
Map<Value, Value> map = mapValue.map();
|
282
|
+
for (JsonColumn jsonColumn : jsonColumns.get(rootPath).values()) {
|
283
|
+
Value k = jsonColumn.getElementPathValue();
|
284
|
+
Value v = map.get(k);
|
285
|
+
String newPath = jsonColumn.getName();
|
286
|
+
Value visited = visit(newPath, v);
|
287
|
+
if (visited == null) {
|
288
|
+
visited = jsonColumn.getDefaultValue();
|
289
|
+
}
|
290
|
+
newValue.add(i++, k);
|
291
|
+
newValue.add(i++, visited);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
else {
|
295
|
+
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
296
|
+
Value k = entry.getKey();
|
297
|
+
Value v = entry.getValue();
|
298
|
+
String newPath = newMapJsonPath(rootPath, k);
|
299
|
+
Value visited = visit(newPath, v);
|
300
|
+
newValue.add(i++, k);
|
301
|
+
newValue.add(i++, visited);
|
302
|
+
}
|
303
|
+
}
|
304
|
+
if (this.jsonAddColumns.containsKey(rootPath)) {
|
305
|
+
for (JsonColumn jsonColumn : this.jsonAddColumns.get(rootPath).values()) {
|
306
|
+
newValue.add(i++, jsonColumn.getElementPathValue());
|
307
|
+
newValue.add(i++, jsonColumn.getDefaultValue());
|
308
|
+
}
|
309
|
+
}
|
310
|
+
return ValueFactory.newMap(newValue.toArray(new Value[0]), true);
|
311
|
+
}
|
312
|
+
|
313
|
+
public Value visit(String rootPath, Value value)
|
314
|
+
{
|
315
|
+
if (! shouldVisit(rootPath)) {
|
316
|
+
return value;
|
317
|
+
}
|
318
|
+
if (value.isArrayValue()) {
|
319
|
+
return visitArray(rootPath, value.asArrayValue());
|
320
|
+
}
|
321
|
+
else if (value.isMapValue()) {
|
322
|
+
return visitMap(rootPath, value.asMapValue());
|
323
|
+
}
|
324
|
+
else {
|
325
|
+
return value;
|
326
|
+
}
|
327
|
+
}
|
328
|
+
}
|