embulk-filter-typecast 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/CHANGELOG.md +6 -0
- data/build.gradle +1 -1
- data/example/example.csv +11 -0
- data/example/example2.yml +6 -2
- data/src/main/java/org/embulk/filter/typecast/{TypecastPageBuilder.java → ColumnCaster.java} +47 -14
- data/src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java +36 -133
- data/src/main/java/org/embulk/filter/typecast/{TypecastJsonBuilder.java → JsonCaster.java} +31 -50
- data/src/main/java/org/embulk/filter/typecast/JsonVisitor.java +136 -0
- data/src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java +11 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e8a2482d0bd6fc6109bb8763f9a1f9db6b0733db
|
4
|
+
data.tar.gz: b80c4d01e823e3a4b59594002277fc78e1ee0ef9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7473b57b158d8936e14015358dcd21d3bfe852f0b7c33de10da4ad38aa4553b3ddf39e5b41c9ed11df32713c42a33506aba22820b2236590ab124035a1056783
|
7
|
+
data.tar.gz: 354bf8ab52c2ee5ba124af046abea9e311213c4ebaa03991f432504cc87094a0e1d4815c3c94434ee843de836f6e2146331540fd23b286071c000011bb64c55a
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
data/build.gradle
CHANGED
data/example/example.csv
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
timestamp,null,long,string,double,json1,json2,boolean
|
2
|
+
2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"0"},{"long":0},true
|
3
|
+
2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"1"},{"long":1},true
|
4
|
+
2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,{"string":"2"},{"long":2},true
|
5
|
+
2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,{"string":"3"},{"long":3},true
|
6
|
+
2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,{"string":"4"},{"long":4},true
|
7
|
+
2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,{"string":"5"},{"long":5},false
|
8
|
+
2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,{"string":"6"},{"long":6},false
|
9
|
+
2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,{"string":"7"},{"long":7},false
|
10
|
+
2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"8"},{"long":8},false
|
11
|
+
2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"9"},{"long":9},false
|
data/example/example2.yml
CHANGED
@@ -14,7 +14,8 @@ in:
|
|
14
14
|
- {name: long, type: string}
|
15
15
|
- {name: string, type: string}
|
16
16
|
- {name: double, type: string}
|
17
|
-
- {name:
|
17
|
+
- {name: json1, type: string}
|
18
|
+
- {name: json2, type: string}
|
18
19
|
- {name: boolean, type: boolean}
|
19
20
|
filters:
|
20
21
|
- type: typecast
|
@@ -24,7 +25,10 @@ filters:
|
|
24
25
|
- {name: long, type: long}
|
25
26
|
- {name: string, type: string}
|
26
27
|
- {name: double, type: double}
|
27
|
-
- {name:
|
28
|
+
- {name: json1, type: json}
|
29
|
+
- {name: json2, type: json}
|
28
30
|
- {name: boolean, type: boolean}
|
31
|
+
- {name: "$.json1.string", type: long}
|
32
|
+
- {name: "$.json2.long", type: long}
|
29
33
|
out:
|
30
34
|
type: "null"
|
data/src/main/java/org/embulk/filter/typecast/{TypecastPageBuilder.java → ColumnCaster.java}
RENAMED
@@ -2,17 +2,45 @@ package org.embulk.filter.typecast;
|
|
2
2
|
|
3
3
|
import org.embulk.filter.typecast.cast.*;
|
4
4
|
|
5
|
+
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
6
|
+
|
5
7
|
import org.embulk.spi.Column;
|
6
8
|
import org.embulk.spi.DataException;
|
9
|
+
import org.embulk.spi.Exec;
|
7
10
|
import org.embulk.spi.PageBuilder;
|
11
|
+
import org.embulk.spi.PageReader;
|
12
|
+
import org.embulk.spi.Schema;
|
8
13
|
import org.embulk.spi.time.Timestamp;
|
9
14
|
import org.embulk.spi.time.TimestampFormatter;
|
10
15
|
import org.embulk.spi.time.TimestampParser;
|
11
16
|
import org.embulk.spi.type.*;
|
12
17
|
import org.msgpack.value.Value;
|
13
18
|
|
14
|
-
|
15
|
-
|
19
|
+
import org.slf4j.Logger;
|
20
|
+
|
21
|
+
|
22
|
+
class ColumnCaster
|
23
|
+
{
|
24
|
+
private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
|
25
|
+
private final PluginTask task;
|
26
|
+
private final Schema inputSchema;
|
27
|
+
private final Schema outputSchema;
|
28
|
+
private final PageReader pageReader;
|
29
|
+
private final PageBuilder pageBuilder;
|
30
|
+
private final JsonVisitor jsonVisitor;
|
31
|
+
|
32
|
+
ColumnCaster(TypecastFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema,
|
33
|
+
PageReader pageReader, PageBuilder pageBuilder)
|
34
|
+
{
|
35
|
+
this.task = task;
|
36
|
+
this.inputSchema = inputSchema;
|
37
|
+
this.outputSchema = outputSchema;
|
38
|
+
this.pageReader = pageReader;
|
39
|
+
this.pageBuilder = pageBuilder;
|
40
|
+
this.jsonVisitor = new JsonVisitor(task, inputSchema, outputSchema);
|
41
|
+
}
|
42
|
+
|
43
|
+
public void setFromBoolean(Column outputColumn, boolean value) {
|
16
44
|
Type outputType = outputColumn.getType();
|
17
45
|
if (outputType instanceof BooleanType) {
|
18
46
|
pageBuilder.setBoolean(outputColumn, BooleanCast.asBoolean(value));
|
@@ -31,7 +59,7 @@ class TypecastPageBuilder {
|
|
31
59
|
}
|
32
60
|
}
|
33
61
|
|
34
|
-
|
62
|
+
public void setFromLong(Column outputColumn, long value)
|
35
63
|
{
|
36
64
|
Type outputType = outputColumn.getType();
|
37
65
|
if (outputType instanceof BooleanType) {
|
@@ -51,7 +79,7 @@ class TypecastPageBuilder {
|
|
51
79
|
}
|
52
80
|
}
|
53
81
|
|
54
|
-
|
82
|
+
public void setFromDouble(Column outputColumn, double value)
|
55
83
|
{
|
56
84
|
try {
|
57
85
|
Type outputType = outputColumn.getType();
|
@@ -76,7 +104,7 @@ class TypecastPageBuilder {
|
|
76
104
|
}
|
77
105
|
}
|
78
106
|
|
79
|
-
|
107
|
+
public void setFromString(Column outputColumn, String value, TimestampParser timestampParser)
|
80
108
|
{
|
81
109
|
Type outputType = outputColumn.getType();
|
82
110
|
if (outputType instanceof BooleanType) {
|
@@ -90,13 +118,16 @@ class TypecastPageBuilder {
|
|
90
118
|
} else if (outputType instanceof TimestampType) {
|
91
119
|
pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
|
92
120
|
} else if (outputType instanceof JsonType) {
|
93
|
-
|
121
|
+
Value jsonValue = StringCast.asJson(value);
|
122
|
+
String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
|
123
|
+
Value castedValue = jsonVisitor.visit(jsonPath, jsonValue);
|
124
|
+
pageBuilder.setJson(outputColumn, castedValue);
|
94
125
|
} else {
|
95
126
|
assert(false);
|
96
127
|
}
|
97
128
|
}
|
98
129
|
|
99
|
-
|
130
|
+
public void setFromTimestamp(Column outputColumn, Timestamp value, TimestampFormatter timestampFormatter)
|
100
131
|
{
|
101
132
|
Type outputType = outputColumn.getType();
|
102
133
|
if (outputType instanceof BooleanType) {
|
@@ -116,21 +147,23 @@ class TypecastPageBuilder {
|
|
116
147
|
}
|
117
148
|
}
|
118
149
|
|
119
|
-
|
150
|
+
public void setFromJson(Column outputColumn, Value value)
|
120
151
|
{
|
152
|
+
String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
|
153
|
+
Value castedValue = jsonVisitor.visit(jsonPath, value);
|
121
154
|
Type outputType = outputColumn.getType();
|
122
155
|
if (outputType instanceof BooleanType) {
|
123
|
-
pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(
|
156
|
+
pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(castedValue));
|
124
157
|
} else if (outputType instanceof LongType) {
|
125
|
-
pageBuilder.setLong(outputColumn, JsonCast.asLong(
|
158
|
+
pageBuilder.setLong(outputColumn, JsonCast.asLong(castedValue));
|
126
159
|
} else if (outputType instanceof DoubleType) {
|
127
|
-
pageBuilder.setDouble(outputColumn, JsonCast.asDouble(
|
160
|
+
pageBuilder.setDouble(outputColumn, JsonCast.asDouble(castedValue));
|
128
161
|
} else if (outputType instanceof StringType) {
|
129
|
-
pageBuilder.setString(outputColumn, JsonCast.asString(
|
162
|
+
pageBuilder.setString(outputColumn, JsonCast.asString(castedValue));
|
130
163
|
} else if (outputType instanceof TimestampType) {
|
131
|
-
pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(
|
164
|
+
pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(castedValue));
|
132
165
|
} else if (outputType instanceof JsonType) {
|
133
|
-
pageBuilder.setJson(outputColumn, JsonCast.asJson(
|
166
|
+
pageBuilder.setJson(outputColumn, JsonCast.asJson(castedValue));
|
134
167
|
} else {
|
135
168
|
assert(false);
|
136
169
|
}
|
@@ -1,11 +1,8 @@
|
|
1
1
|
package org.embulk.filter.typecast;
|
2
2
|
|
3
3
|
import org.embulk.spi.*;
|
4
|
-
import org.embulk.spi.type.
|
5
|
-
import org.
|
6
|
-
import org.msgpack.value.MapValue;
|
7
|
-
import org.msgpack.value.Value;
|
8
|
-
import org.msgpack.value.ValueFactory;
|
4
|
+
import org.embulk.spi.type.StringType;
|
5
|
+
import org.embulk.spi.type.TimestampType;
|
9
6
|
|
10
7
|
import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
|
11
8
|
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
@@ -16,8 +13,6 @@ import org.joda.time.DateTimeZone;
|
|
16
13
|
import org.slf4j.Logger;
|
17
14
|
|
18
15
|
import java.util.HashMap;
|
19
|
-
import java.util.HashSet;
|
20
|
-
import java.util.Map;
|
21
16
|
|
22
17
|
public class ColumnVisitorImpl
|
23
18
|
implements ColumnVisitor
|
@@ -31,11 +26,10 @@ public class ColumnVisitorImpl
|
|
31
26
|
private final HashMap<String, Column> outputColumnMap = new HashMap<>();
|
32
27
|
private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
|
33
28
|
private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
|
34
|
-
private final
|
35
|
-
private final HashMap<String, Type> jsonPathTypeMap = new HashMap<>();
|
29
|
+
private final ColumnCaster columnCaster;
|
36
30
|
|
37
31
|
ColumnVisitorImpl(PluginTask task, Schema inputSchema, Schema outputSchema,
|
38
|
-
|
32
|
+
PageReader pageReader, PageBuilder pageBuilder)
|
39
33
|
{
|
40
34
|
this.task = task;
|
41
35
|
this.inputSchema = inputSchema;
|
@@ -43,11 +37,11 @@ public class ColumnVisitorImpl
|
|
43
37
|
this.pageReader = pageReader;
|
44
38
|
this.pageBuilder = pageBuilder;
|
45
39
|
|
40
|
+
this.columnCaster = new ColumnCaster(task, inputSchema, outputSchema, pageReader, pageBuilder);
|
41
|
+
|
46
42
|
buildOutputColumnMap();
|
47
43
|
buildTimestampParserMap();
|
48
44
|
buildTimestampFormatterMap();
|
49
|
-
buildShouldVisitJsonPathSet();;
|
50
|
-
buildJsonPathTypeMap();
|
51
45
|
}
|
52
46
|
|
53
47
|
private void buildOutputColumnMap()
|
@@ -60,132 +54,46 @@ public class ColumnVisitorImpl
|
|
60
54
|
|
61
55
|
private void buildTimestampParserMap()
|
62
56
|
{
|
63
|
-
// columnName
|
57
|
+
// columnName => TimestampParser
|
64
58
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
65
|
-
|
66
|
-
|
67
|
-
}
|
68
|
-
}
|
69
|
-
|
70
|
-
private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
|
71
|
-
{
|
72
|
-
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
73
|
-
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
74
|
-
return new TimestampParser(task.getJRuby(), format, timezone);
|
75
|
-
}
|
76
|
-
|
77
|
-
private void buildTimestampFormatterMap()
|
78
|
-
{
|
79
|
-
// columnName or jsonPath => TimestampFormatter
|
80
|
-
for (ColumnConfig columnConfig : task.getColumns()) {
|
81
|
-
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
82
|
-
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
83
|
-
}
|
84
|
-
}
|
85
|
-
|
86
|
-
private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
|
87
|
-
{
|
88
|
-
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
89
|
-
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
90
|
-
return new TimestampFormatter(task.getJRuby(), format, timezone);
|
91
|
-
}
|
92
|
-
|
93
|
-
private void buildShouldVisitJsonPathSet()
|
94
|
-
{
|
95
|
-
// json partial path => Boolean to avoid unnecessary type: json visit
|
96
|
-
for (ColumnConfig columnConfig : task.getColumns()) {
|
97
|
-
String name = columnConfig.getName();
|
98
|
-
if (!name.startsWith("$.")) {
|
99
|
-
continue;
|
59
|
+
if (columnConfig.getName().startsWith("$.")) {
|
60
|
+
continue; // type: json columns do not support type: timestamp
|
100
61
|
}
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
String[] arrayParts = parts[i].split("\\[");
|
106
|
-
partialPath.append(".").append(arrayParts[0]);
|
107
|
-
this.shouldVisitJsonPathSet.add(partialPath.toString());
|
108
|
-
for (int j = 1; j < arrayParts.length; j++) {
|
109
|
-
partialPath.append("[").append(arrayParts[j]);
|
110
|
-
this.shouldVisitJsonPathSet.add(partialPath.toString());
|
111
|
-
}
|
112
|
-
}
|
113
|
-
else {
|
114
|
-
partialPath.append(".").append(parts[i]);
|
115
|
-
this.shouldVisitJsonPathSet.add(partialPath.toString());
|
116
|
-
}
|
62
|
+
Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
|
63
|
+
if (inputColumn.getType() instanceof StringType && columnConfig.getType() instanceof TimestampType) {
|
64
|
+
TimestampParser parser = getTimestampParser(columnConfig, task);
|
65
|
+
this.timestampParserMap.put(columnConfig.getName(), parser);
|
117
66
|
}
|
118
67
|
}
|
119
68
|
}
|
120
69
|
|
121
|
-
private void
|
70
|
+
private void buildTimestampFormatterMap()
|
122
71
|
{
|
123
|
-
//
|
72
|
+
// columnName => TimestampFormatter
|
124
73
|
for (ColumnConfig columnConfig : task.getColumns()) {
|
125
|
-
|
126
|
-
|
127
|
-
|
74
|
+
if (columnConfig.getName().startsWith("$.")) {
|
75
|
+
continue; // type: json columns do not have type: timestamp
|
76
|
+
}
|
77
|
+
Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
|
78
|
+
if (inputColumn.getType() instanceof TimestampType && columnConfig.getType() instanceof StringType) {
|
79
|
+
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
80
|
+
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
128
81
|
}
|
129
|
-
Type type = columnConfig.getType();
|
130
|
-
this.jsonPathTypeMap.put(name, type);
|
131
82
|
}
|
132
83
|
}
|
133
84
|
|
134
|
-
private
|
85
|
+
private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
|
135
86
|
{
|
136
|
-
|
87
|
+
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
88
|
+
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
89
|
+
return new TimestampParser(task.getJRuby(), format, timezone);
|
137
90
|
}
|
138
91
|
|
139
|
-
private
|
92
|
+
private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
|
140
93
|
{
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
if (value.isArrayValue()) {
|
145
|
-
ArrayValue arrayValue = value.asArrayValue();
|
146
|
-
int size = arrayValue.size();
|
147
|
-
Value[] newValue = new Value[size];
|
148
|
-
for (int i = 0; i < size; i++) {
|
149
|
-
String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
|
150
|
-
Value v = arrayValue.get(i);
|
151
|
-
newValue[i] = castJsonRecursively(task, k, v);
|
152
|
-
}
|
153
|
-
return ValueFactory.newArray(newValue, true);
|
154
|
-
}
|
155
|
-
else if (value.isMapValue()) {
|
156
|
-
MapValue mapValue = value.asMapValue();
|
157
|
-
int size = mapValue.size() * 2;
|
158
|
-
Value[] newValue = new Value[size];
|
159
|
-
int i = 0;
|
160
|
-
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
161
|
-
Value k = entry.getKey();
|
162
|
-
Value v = entry.getValue();
|
163
|
-
String newPath = new StringBuilder(jsonPath).append(".").append(k.asStringValue().asString()).toString();
|
164
|
-
Value r = castJsonRecursively(task, newPath, v);
|
165
|
-
newValue[i++] = k;
|
166
|
-
newValue[i++] = r;
|
167
|
-
}
|
168
|
-
return ValueFactory.newMap(newValue, true);
|
169
|
-
}
|
170
|
-
else if (value.isBooleanValue()) {
|
171
|
-
Type outputType = jsonPathTypeMap.get(jsonPath);
|
172
|
-
return TypecastJsonBuilder.getFromBoolean(outputType, value.asBooleanValue().getBoolean());
|
173
|
-
}
|
174
|
-
else if (value.isIntegerValue()) {
|
175
|
-
Type outputType = jsonPathTypeMap.get(jsonPath);
|
176
|
-
return TypecastJsonBuilder.getFromLong(outputType, value.asIntegerValue().asLong());
|
177
|
-
}
|
178
|
-
else if (value.isFloatValue()) {
|
179
|
-
Type outputType = jsonPathTypeMap.get(jsonPath);
|
180
|
-
return TypecastJsonBuilder.getFromDouble(outputType, value.asFloatValue().toDouble());
|
181
|
-
}
|
182
|
-
else if (value.isStringValue()) {
|
183
|
-
Type outputType = jsonPathTypeMap.get(jsonPath);
|
184
|
-
return TypecastJsonBuilder.getFromString(outputType, value.asStringValue().asString());
|
185
|
-
}
|
186
|
-
else {
|
187
|
-
return value;
|
188
|
-
}
|
94
|
+
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
95
|
+
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
96
|
+
return new TimestampFormatter(task.getJRuby(), format, timezone);
|
189
97
|
}
|
190
98
|
|
191
99
|
private interface PageBuildable
|
@@ -217,7 +125,7 @@ public class ColumnVisitorImpl
|
|
217
125
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
218
126
|
PageBuildable op = new PageBuildable() {
|
219
127
|
public void run() throws DataException {
|
220
|
-
|
128
|
+
columnCaster.setFromBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
221
129
|
}
|
222
130
|
};
|
223
131
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -229,7 +137,7 @@ public class ColumnVisitorImpl
|
|
229
137
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
230
138
|
PageBuildable op = new PageBuildable() {
|
231
139
|
public void run() throws DataException {
|
232
|
-
|
140
|
+
columnCaster.setFromLong(outputColumn, pageReader.getLong(inputColumn));
|
233
141
|
}
|
234
142
|
};
|
235
143
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -241,7 +149,7 @@ public class ColumnVisitorImpl
|
|
241
149
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
242
150
|
PageBuildable op = new PageBuildable() {
|
243
151
|
public void run() throws DataException {
|
244
|
-
|
152
|
+
columnCaster.setFromDouble(outputColumn, pageReader.getDouble(inputColumn));
|
245
153
|
}
|
246
154
|
};
|
247
155
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -254,8 +162,7 @@ public class ColumnVisitorImpl
|
|
254
162
|
final TimestampParser timestampParser = timestampParserMap.get(inputColumn.getName());
|
255
163
|
PageBuildable op = new PageBuildable() {
|
256
164
|
public void run() throws DataException {
|
257
|
-
|
258
|
-
pageBuilder, outputColumn, pageReader.getString(inputColumn), timestampParser);
|
165
|
+
columnCaster.setFromString(outputColumn, pageReader.getString(inputColumn), timestampParser);
|
259
166
|
}
|
260
167
|
};
|
261
168
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -268,8 +175,7 @@ public class ColumnVisitorImpl
|
|
268
175
|
final TimestampFormatter timestampFormatter = timestampFormatterMap.get(inputColumn.getName());
|
269
176
|
PageBuildable op = new PageBuildable() {
|
270
177
|
public void run() throws DataException {
|
271
|
-
|
272
|
-
pageBuilder, outputColumn, pageReader.getTimestamp(inputColumn), timestampFormatter);
|
178
|
+
columnCaster.setFromTimestamp(outputColumn, pageReader.getTimestamp(inputColumn), timestampFormatter);
|
273
179
|
}
|
274
180
|
};
|
275
181
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -278,13 +184,10 @@ public class ColumnVisitorImpl
|
|
278
184
|
@Override
|
279
185
|
public void jsonColumn(final Column inputColumn)
|
280
186
|
{
|
281
|
-
String jsonPath = new StringBuilder("$.").append(inputColumn.getName()).toString();
|
282
|
-
Value value = pageReader.getJson(inputColumn);
|
283
|
-
final Value castedValue = castJsonRecursively(task, jsonPath, value);
|
284
187
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
285
188
|
PageBuildable op = new PageBuildable() {
|
286
189
|
public void run() throws DataException {
|
287
|
-
|
190
|
+
columnCaster.setFromJson(outputColumn, pageReader.getJson(inputColumn));
|
288
191
|
}
|
289
192
|
};
|
290
193
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -4,21 +4,28 @@ import org.embulk.filter.typecast.cast.*;
|
|
4
4
|
|
5
5
|
import org.embulk.spi.DataException;
|
6
6
|
import org.embulk.spi.type.*;
|
7
|
+
import org.msgpack.value.BooleanValue;
|
8
|
+
import org.msgpack.value.IntegerValue;
|
9
|
+
import org.msgpack.value.FloatValue;
|
10
|
+
import org.msgpack.value.StringValue;
|
7
11
|
import org.msgpack.value.Value;
|
8
12
|
import org.msgpack.value.ValueFactory;
|
9
13
|
|
10
|
-
class
|
11
|
-
|
14
|
+
class JsonCaster
|
15
|
+
{
|
16
|
+
public JsonCaster()
|
17
|
+
{
|
18
|
+
}
|
19
|
+
|
20
|
+
public Value fromBoolean(Type outputType, BooleanValue value) {
|
12
21
|
if (outputType instanceof BooleanType) {
|
13
|
-
return
|
22
|
+
return value;
|
14
23
|
} else if (outputType instanceof LongType) {
|
15
|
-
return ValueFactory.newInteger(BooleanCast.asLong(value));
|
24
|
+
return ValueFactory.newInteger(BooleanCast.asLong(value.getBoolean()));
|
16
25
|
} else if (outputType instanceof DoubleType) {
|
17
|
-
return ValueFactory.newFloat(BooleanCast.asDouble(value));
|
26
|
+
return ValueFactory.newFloat(BooleanCast.asDouble(value.getBoolean()));
|
18
27
|
} else if (outputType instanceof StringType) {
|
19
|
-
return ValueFactory.newString(BooleanCast.asString(value));
|
20
|
-
} else if (outputType instanceof TimestampType) {
|
21
|
-
throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
|
28
|
+
return ValueFactory.newString(BooleanCast.asString(value.getBoolean()));
|
22
29
|
} else if (outputType instanceof JsonType) {
|
23
30
|
throw new DataException(String.format("cannot cast boolean to json: \"%s\"", value));
|
24
31
|
} else {
|
@@ -27,18 +34,16 @@ class TypecastJsonBuilder {
|
|
27
34
|
}
|
28
35
|
}
|
29
36
|
|
30
|
-
|
37
|
+
public Value fromLong(Type outputType, IntegerValue value)
|
31
38
|
{
|
32
39
|
if (outputType instanceof BooleanType) {
|
33
|
-
return ValueFactory.newBoolean(LongCast.asBoolean(value));
|
40
|
+
return ValueFactory.newBoolean(LongCast.asBoolean(value.asLong()));
|
34
41
|
} else if (outputType instanceof LongType) {
|
35
|
-
return
|
42
|
+
return value;
|
36
43
|
} else if (outputType instanceof DoubleType) {
|
37
|
-
return ValueFactory.newFloat(LongCast.asDouble(value));
|
44
|
+
return ValueFactory.newFloat(LongCast.asDouble(value.asLong()));
|
38
45
|
} else if (outputType instanceof StringType) {
|
39
|
-
return ValueFactory.newString(LongCast.asString(value));
|
40
|
-
} else if (outputType instanceof TimestampType) {
|
41
|
-
throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
|
46
|
+
return ValueFactory.newString(LongCast.asString(value.asLong()));
|
42
47
|
} else if (outputType instanceof JsonType) {
|
43
48
|
throw new DataException(String.format("cannot cast long to json:: \"%s\"", value));
|
44
49
|
} else {
|
@@ -47,18 +52,16 @@ class TypecastJsonBuilder {
|
|
47
52
|
}
|
48
53
|
}
|
49
54
|
|
50
|
-
|
55
|
+
public Value fromDouble(Type outputType, FloatValue value)
|
51
56
|
{
|
52
57
|
if (outputType instanceof BooleanType) {
|
53
|
-
return ValueFactory.newBoolean(DoubleCast.asBoolean(value));
|
58
|
+
return ValueFactory.newBoolean(DoubleCast.asBoolean(value.toDouble()));
|
54
59
|
} else if (outputType instanceof LongType) {
|
55
|
-
return ValueFactory.newInteger(DoubleCast.asLong(value));
|
60
|
+
return ValueFactory.newInteger(DoubleCast.asLong(value.toDouble()));
|
56
61
|
} else if (outputType instanceof DoubleType) {
|
57
|
-
return
|
62
|
+
return value;
|
58
63
|
} else if (outputType instanceof StringType) {
|
59
|
-
return ValueFactory.newString(DoubleCast.asString(value));
|
60
|
-
} else if (outputType instanceof TimestampType) {
|
61
|
-
throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
|
64
|
+
return ValueFactory.newString(DoubleCast.asString(value.toDouble()));
|
62
65
|
} else if (outputType instanceof JsonType) {
|
63
66
|
throw new DataException(String.format("cannot cast double to json:: \"%s\"", value));
|
64
67
|
} else {
|
@@ -67,40 +70,18 @@ class TypecastJsonBuilder {
|
|
67
70
|
}
|
68
71
|
}
|
69
72
|
|
70
|
-
|
71
|
-
{
|
72
|
-
if (outputType instanceof BooleanType) {
|
73
|
-
return ValueFactory.newBoolean(StringCast.asBoolean(value));
|
74
|
-
} else if (outputType instanceof LongType) {
|
75
|
-
return ValueFactory.newInteger(StringCast.asLong(value));
|
76
|
-
} else if (outputType instanceof DoubleType) {
|
77
|
-
return ValueFactory.newFloat(StringCast.asDouble(value));
|
78
|
-
} else if (outputType instanceof StringType) {
|
79
|
-
return ValueFactory.newString(StringCast.asString(value));
|
80
|
-
} else if (outputType instanceof TimestampType) {
|
81
|
-
throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
|
82
|
-
} else if (outputType instanceof JsonType) {
|
83
|
-
return StringCast.asJson(value);
|
84
|
-
} else {
|
85
|
-
assert(false);
|
86
|
-
return null;
|
87
|
-
}
|
88
|
-
}
|
89
|
-
|
90
|
-
static Value getFromJson(Type outputType, Value value)
|
73
|
+
public Value fromString(Type outputType, StringValue value)
|
91
74
|
{
|
92
75
|
if (outputType instanceof BooleanType) {
|
93
|
-
return ValueFactory.newBoolean(
|
76
|
+
return ValueFactory.newBoolean(StringCast.asBoolean(value.asString()));
|
94
77
|
} else if (outputType instanceof LongType) {
|
95
|
-
return ValueFactory.newInteger(
|
78
|
+
return ValueFactory.newInteger(StringCast.asLong(value.asString()));
|
96
79
|
} else if (outputType instanceof DoubleType) {
|
97
|
-
return ValueFactory.newFloat(
|
80
|
+
return ValueFactory.newFloat(StringCast.asDouble(value.asString()));
|
98
81
|
} else if (outputType instanceof StringType) {
|
99
|
-
return ValueFactory.newString(JsonCast.asString(value));
|
100
|
-
} else if (outputType instanceof TimestampType) {
|
101
|
-
throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
|
102
|
-
} else if (outputType instanceof JsonType) {
|
103
82
|
return value;
|
83
|
+
} else if (outputType instanceof JsonType) {
|
84
|
+
return StringCast.asJson(value.asString());
|
104
85
|
} else {
|
105
86
|
assert(false);
|
106
87
|
return null;
|
@@ -0,0 +1,136 @@
|
|
1
|
+
package org.embulk.filter.typecast;
|
2
|
+
|
3
|
+
import org.embulk.spi.*;
|
4
|
+
import org.embulk.spi.type.Type;
|
5
|
+
import org.msgpack.value.ArrayValue;
|
6
|
+
import org.msgpack.value.MapValue;
|
7
|
+
import org.msgpack.value.Value;
|
8
|
+
import org.msgpack.value.ValueFactory;
|
9
|
+
|
10
|
+
import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
|
11
|
+
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
12
|
+
|
13
|
+
import org.slf4j.Logger;
|
14
|
+
|
15
|
+
import java.util.HashMap;
|
16
|
+
import java.util.HashSet;
|
17
|
+
import java.util.Map;
|
18
|
+
|
19
|
+
public class JsonVisitor
|
20
|
+
{
|
21
|
+
private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
|
22
|
+
private final PluginTask task;
|
23
|
+
private final Schema inputSchema;
|
24
|
+
private final Schema outputSchema;
|
25
|
+
private final HashSet<String> shouldVisitSet = new HashSet<>();
|
26
|
+
private final HashMap<String, Type> jsonPathTypeMap = new HashMap<>();
|
27
|
+
private final JsonCaster jsonCaster = new JsonCaster();
|
28
|
+
|
29
|
+
JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
|
30
|
+
{
|
31
|
+
this.task = task;
|
32
|
+
this.inputSchema = inputSchema;
|
33
|
+
this.outputSchema = outputSchema;
|
34
|
+
|
35
|
+
buildShouldVisitSet();
|
36
|
+
buildJsonPathTypeMap();
|
37
|
+
}
|
38
|
+
|
39
|
+
private void buildJsonPathTypeMap()
|
40
|
+
{
|
41
|
+
// json path => Type
|
42
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
43
|
+
String name = columnConfig.getName();
|
44
|
+
if (!name.startsWith("$.")) {
|
45
|
+
continue;
|
46
|
+
}
|
47
|
+
Type type = columnConfig.getType();
|
48
|
+
this.jsonPathTypeMap.put(name, type);
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
private void buildShouldVisitSet()
|
53
|
+
{
|
54
|
+
// json partial path => Boolean to avoid unnecessary type: json visit
|
55
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
56
|
+
String name = columnConfig.getName();
|
57
|
+
if (!name.startsWith("$.")) {
|
58
|
+
continue;
|
59
|
+
}
|
60
|
+
String[] parts = name.split("\\.");
|
61
|
+
StringBuilder partialPath = new StringBuilder("$");
|
62
|
+
for (int i = 1; i < parts.length; i++) {
|
63
|
+
if (parts[i].contains("[")) {
|
64
|
+
String[] arrayParts = parts[i].split("\\[");
|
65
|
+
partialPath.append(".").append(arrayParts[0]);
|
66
|
+
this.shouldVisitSet.add(partialPath.toString());
|
67
|
+
for (int j = 1; j < arrayParts.length; j++) {
|
68
|
+
partialPath.append("[").append(arrayParts[j]);
|
69
|
+
this.shouldVisitSet.add(partialPath.toString());
|
70
|
+
}
|
71
|
+
}
|
72
|
+
else {
|
73
|
+
partialPath.append(".").append(parts[i]);
|
74
|
+
this.shouldVisitSet.add(partialPath.toString());
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
private boolean shouldVisit(String jsonPath)
|
81
|
+
{
|
82
|
+
return shouldVisitSet.contains(jsonPath);
|
83
|
+
}
|
84
|
+
|
85
|
+
public Value visit(String jsonPath, Value value)
|
86
|
+
{
|
87
|
+
if (!shouldVisit(jsonPath)) {
|
88
|
+
return value;
|
89
|
+
}
|
90
|
+
if (value.isArrayValue()) {
|
91
|
+
ArrayValue arrayValue = value.asArrayValue();
|
92
|
+
int size = arrayValue.size();
|
93
|
+
Value[] newValue = new Value[size];
|
94
|
+
for (int i = 0; i < size; i++) {
|
95
|
+
String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
|
96
|
+
Value v = arrayValue.get(i);
|
97
|
+
newValue[i] = visit(k, v);
|
98
|
+
}
|
99
|
+
return ValueFactory.newArray(newValue, true);
|
100
|
+
}
|
101
|
+
else if (value.isMapValue()) {
|
102
|
+
MapValue mapValue = value.asMapValue();
|
103
|
+
int size = mapValue.size() * 2;
|
104
|
+
Value[] newValue = new Value[size];
|
105
|
+
int i = 0;
|
106
|
+
for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
|
107
|
+
Value k = entry.getKey();
|
108
|
+
Value v = entry.getValue();
|
109
|
+
String newPath = new StringBuilder(jsonPath).append(".").append(k.asStringValue().asString()).toString();
|
110
|
+
Value r = visit(newPath, v);
|
111
|
+
newValue[i++] = k;
|
112
|
+
newValue[i++] = r;
|
113
|
+
}
|
114
|
+
return ValueFactory.newMap(newValue, true);
|
115
|
+
}
|
116
|
+
else if (value.isBooleanValue()) {
|
117
|
+
Type outputType = jsonPathTypeMap.get(jsonPath);
|
118
|
+
return jsonCaster.fromBoolean(outputType, value.asBooleanValue());
|
119
|
+
}
|
120
|
+
else if (value.isIntegerValue()) {
|
121
|
+
Type outputType = jsonPathTypeMap.get(jsonPath);
|
122
|
+
return jsonCaster.fromLong(outputType, value.asIntegerValue());
|
123
|
+
}
|
124
|
+
else if (value.isFloatValue()) {
|
125
|
+
Type outputType = jsonPathTypeMap.get(jsonPath);
|
126
|
+
return jsonCaster.fromDouble(outputType, value.asFloatValue());
|
127
|
+
}
|
128
|
+
else if (value.isStringValue()) {
|
129
|
+
Type outputType = jsonPathTypeMap.get(jsonPath);
|
130
|
+
return jsonCaster.fromString(outputType, value.asStringValue());
|
131
|
+
}
|
132
|
+
else {
|
133
|
+
return value;
|
134
|
+
}
|
135
|
+
}
|
136
|
+
}
|
@@ -4,15 +4,18 @@ import com.google.common.base.Optional;
|
|
4
4
|
import com.google.common.collect.ImmutableList;
|
5
5
|
import org.embulk.config.Config;
|
6
6
|
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.config.ConfigException;
|
7
8
|
import org.embulk.config.ConfigInject;
|
8
9
|
import org.embulk.config.ConfigSource;
|
9
10
|
import org.embulk.config.Task;
|
10
11
|
import org.embulk.config.TaskSource;
|
11
12
|
|
12
13
|
import org.embulk.spi.*;
|
14
|
+
import org.embulk.spi.time.Timestamp;
|
13
15
|
import org.embulk.spi.time.TimestampFormatter;
|
14
16
|
import org.embulk.spi.time.TimestampParser;
|
15
17
|
|
18
|
+
import org.embulk.spi.type.TimestampType;
|
16
19
|
import org.embulk.spi.type.Type;
|
17
20
|
import org.joda.time.DateTimeZone;
|
18
21
|
import org.jruby.embed.ScriptingContainer;
|
@@ -86,7 +89,7 @@ public class TypecastFilterPlugin implements FilterPlugin
|
|
86
89
|
// throw if column does not exist
|
87
90
|
for (ColumnConfig columnConfig : columnConfigs) {
|
88
91
|
String name = columnConfig.getName();
|
89
|
-
if (name.startsWith("$.")) {
|
92
|
+
if (name.startsWith("$.")) { // check only top level column name
|
90
93
|
String firstName = name.split("\\.", 3)[1];
|
91
94
|
inputSchema.lookupColumn(firstName);
|
92
95
|
}
|
@@ -94,6 +97,13 @@ public class TypecastFilterPlugin implements FilterPlugin
|
|
94
97
|
inputSchema.lookupColumn(name);
|
95
98
|
}
|
96
99
|
}
|
100
|
+
// throw if timestamp is specified in json path
|
101
|
+
for (ColumnConfig columnConfig : columnConfigs) {
|
102
|
+
String name = columnConfig.getName();
|
103
|
+
if (name.startsWith("$.") && columnConfig.getType() instanceof TimestampType) {
|
104
|
+
throw new ConfigException(String.format("embulk-filter-typecast: timestamp type is not supported in json column: \"%s\"", name));
|
105
|
+
}
|
106
|
+
}
|
97
107
|
}
|
98
108
|
|
99
109
|
private Schema buildOuputSchema(final PluginTask task, final Schema inputSchema)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-typecast
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,7 @@ files:
|
|
52
52
|
- README.md
|
53
53
|
- build.gradle
|
54
54
|
- config/checkstyle/checkstyle.xml
|
55
|
+
- example/example.csv
|
55
56
|
- example/example.yml
|
56
57
|
- example/example2.yml
|
57
58
|
- gradle/wrapper/gradle-wrapper.jar
|
@@ -59,10 +60,11 @@ files:
|
|
59
60
|
- gradlew
|
60
61
|
- gradlew.bat
|
61
62
|
- lib/embulk/filter/typecast.rb
|
63
|
+
- src/main/java/org/embulk/filter/typecast/ColumnCaster.java
|
62
64
|
- src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java
|
65
|
+
- src/main/java/org/embulk/filter/typecast/JsonCaster.java
|
66
|
+
- src/main/java/org/embulk/filter/typecast/JsonVisitor.java
|
63
67
|
- src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java
|
64
|
-
- src/main/java/org/embulk/filter/typecast/TypecastJsonBuilder.java
|
65
|
-
- src/main/java/org/embulk/filter/typecast/TypecastPageBuilder.java
|
66
68
|
- src/main/java/org/embulk/filter/typecast/cast/BooleanCast.java
|
67
69
|
- src/main/java/org/embulk/filter/typecast/cast/DoubleCast.java
|
68
70
|
- src/main/java/org/embulk/filter/typecast/cast/JsonCast.java
|
@@ -70,7 +72,7 @@ files:
|
|
70
72
|
- src/main/java/org/embulk/filter/typecast/cast/StringCast.java
|
71
73
|
- src/main/java/org/embulk/filter/typecast/cast/TimestampCast.java
|
72
74
|
- src/test/java/org/embulk/filter/TestTypecastFilterPlugin.java
|
73
|
-
- classpath/embulk-filter-typecast-0.1.
|
75
|
+
- classpath/embulk-filter-typecast-0.1.2.jar
|
74
76
|
homepage: https://github.com/sonots/embulk-filter-typecast
|
75
77
|
licenses:
|
76
78
|
- MIT
|