embulk-filter-typecast 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +12 -0
- data/build.gradle +1 -1
- data/example/empty.yml +24 -0
- data/example/example.csv +11 -11
- data/example/example.yml +5 -0
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/filter/typecast/ColumnCaster.java +155 -60
- data/src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java +30 -72
- data/src/main/java/org/embulk/filter/typecast/JsonCaster.java +56 -30
- data/src/main/java/org/embulk/filter/typecast/JsonVisitor.java +9 -4
- data/src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java +11 -8
- data/src/main/java/org/embulk/filter/typecast/cast/BooleanCast.java +6 -3
- data/src/main/java/org/embulk/filter/typecast/cast/DoubleCast.java +7 -4
- data/src/main/java/org/embulk/filter/typecast/cast/JsonCast.java +5 -2
- data/src/main/java/org/embulk/filter/typecast/cast/LongCast.java +6 -3
- data/src/main/java/org/embulk/filter/typecast/cast/StringCast.java +5 -3
- data/src/main/java/org/embulk/filter/typecast/cast/TimestampCast.java +5 -2
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ced2804053187863055f59aa064418cd0b8d7aa
|
4
|
+
data.tar.gz: 447a4328482d178a479b404bfb7af62c61a2c1f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2833a31678457b155864532d95de1af40594ffa7d5ffee4d489bcc4d993b444dacd18adbbcac444b2ad6b2fe85b2fbb837fa559abbdadd13010a98009894ec1d
|
7
|
+
data.tar.gz: f668410813b7109c96a6277b6ecd0b576334f74261f2c5ce5d941ddc0b6b6e2b4e84d4daeedde49b3a86abc140cc99481fa7a5a0e0b330a9494efccd0cd5eaaf
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -19,6 +19,18 @@ A filter plugin for Embulk to cast column type.
|
|
19
19
|
|
20
20
|
See [example.csv](./example/example.csv) and [example.yml](./example/example.yml).
|
21
21
|
|
22
|
+
## JSONPath (like) name
|
23
|
+
|
24
|
+
For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as:
|
25
|
+
|
26
|
+
```
|
27
|
+
$.payload.key1
|
28
|
+
$.payload.array[0]
|
29
|
+
$.payload.array[*]
|
30
|
+
```
|
31
|
+
|
32
|
+
NOTE: JSONPath syntax is not fully supported
|
33
|
+
|
22
34
|
## ToDo
|
23
35
|
|
24
36
|
* Write test
|
data/build.gradle
CHANGED
data/example/empty.yml
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: example/example.csv
|
4
|
+
parser:
|
5
|
+
type: csv
|
6
|
+
charset: UTF-8
|
7
|
+
newline: CRLF
|
8
|
+
null_string: 'NULL'
|
9
|
+
skip_header_lines: 1
|
10
|
+
comment_line_marker: '#'
|
11
|
+
columns:
|
12
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S.%N", timezone: "+09:00"}
|
13
|
+
- {name: "null", type: string}
|
14
|
+
- {name: long, type: long}
|
15
|
+
- {name: string, type: string}
|
16
|
+
- {name: double, type: double}
|
17
|
+
- {name: json1, type: json}
|
18
|
+
- {name: json2, type: json}
|
19
|
+
- {name: ignore, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
20
|
+
- {name: boolean, type: boolean}
|
21
|
+
filters:
|
22
|
+
- type: typecast
|
23
|
+
out:
|
24
|
+
type: "null"
|
data/example/example.csv
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
timestamp,null,long,string,double,json1,json2,boolean
|
2
|
-
2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"0"},{"long":0},true
|
3
|
-
2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"1"},{"long":1},true
|
4
|
-
2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,{"string":"2"},{"long":2},true
|
5
|
-
2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,{"string":"3"},{"long":3},true
|
6
|
-
2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,{"string":"4"},{"long":4},true
|
7
|
-
2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,{"string":"5"},{"long":5},false
|
8
|
-
2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,{"string":"6"},{"long":6},false
|
9
|
-
2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,{"string":"7"},{"long":7},false
|
10
|
-
2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"8"},{"long":8},false
|
11
|
-
2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"9"},{"long":9},false
|
1
|
+
timestamp,null,long,string,double,json1,json2,array_str,array_int,ignore,boolean
|
2
|
+
2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"0"},{"long":0},["0"],[0],2015-07-13,true
|
3
|
+
2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"1"},{"long":1},["1"],[1],2015-07-13,true
|
4
|
+
2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,{"string":"2"},{"long":2},["2"],[2],2015-07-13,true
|
5
|
+
2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,{"string":"3"},{"long":3},["3"],[3],2015-07-13,true
|
6
|
+
2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,{"string":"4"},{"long":4},["4"],[4],2015-07-13,true
|
7
|
+
2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,{"string":"5"},{"long":5},["5"],[5],2015-07-13,false
|
8
|
+
2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,{"string":"6"},{"long":6},["6"],[6],2015-07-13,false
|
9
|
+
2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,{"string":"7"},{"long":7},["7"],[7],2015-07-13,false
|
10
|
+
2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"8"},{"long":8},["8"],[8],2015-07-13,false
|
11
|
+
2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"9"},{"long":9},["9"],[9],2015-07-13,false
|
data/example/example.yml
CHANGED
@@ -16,6 +16,9 @@ in:
|
|
16
16
|
- {name: double, type: double}
|
17
17
|
- {name: json1, type: json}
|
18
18
|
- {name: json2, type: json}
|
19
|
+
- {name: array_str, type: json}
|
20
|
+
- {name: array_int, type: json}
|
21
|
+
- {name: ignore, type: timestamp, format: "%Y-%m-%d", timezone: "+09:00"}
|
19
22
|
- {name: boolean, type: boolean}
|
20
23
|
filters:
|
21
24
|
- type: typecast
|
@@ -29,5 +32,7 @@ filters:
|
|
29
32
|
- {name: boolean, type: string}
|
30
33
|
- {name: "$.json1.string", type: string}
|
31
34
|
- {name: "$.json2.long", type: string}
|
35
|
+
- {name: "$.array_str[0]", type: long}
|
36
|
+
- {name: "$.array_int[*]", type: string}
|
32
37
|
out:
|
33
38
|
type: "null"
|
data/settings.gradle
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rootProject.name = 'embulk-filter-typecast'
|
@@ -1,11 +1,15 @@
|
|
1
1
|
package org.embulk.filter.typecast;
|
2
2
|
|
3
|
-
import org.embulk.filter.typecast.
|
4
|
-
|
3
|
+
import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
|
5
4
|
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
6
5
|
|
6
|
+
import org.embulk.filter.typecast.cast.BooleanCast;
|
7
|
+
import org.embulk.filter.typecast.cast.DoubleCast;
|
8
|
+
import org.embulk.filter.typecast.cast.JsonCast;
|
9
|
+
import org.embulk.filter.typecast.cast.LongCast;
|
10
|
+
import org.embulk.filter.typecast.cast.StringCast;
|
11
|
+
import org.embulk.filter.typecast.cast.TimestampCast;
|
7
12
|
import org.embulk.spi.Column;
|
8
|
-
import org.embulk.spi.DataException;
|
9
13
|
import org.embulk.spi.Exec;
|
10
14
|
import org.embulk.spi.PageBuilder;
|
11
15
|
import org.embulk.spi.PageReader;
|
@@ -13,11 +17,19 @@ import org.embulk.spi.Schema;
|
|
13
17
|
import org.embulk.spi.time.Timestamp;
|
14
18
|
import org.embulk.spi.time.TimestampFormatter;
|
15
19
|
import org.embulk.spi.time.TimestampParser;
|
16
|
-
import org.embulk.spi.type
|
20
|
+
import org.embulk.spi.type.BooleanType;
|
21
|
+
import org.embulk.spi.type.DoubleType;
|
22
|
+
import org.embulk.spi.type.JsonType;
|
23
|
+
import org.embulk.spi.type.LongType;
|
24
|
+
import org.embulk.spi.type.StringType;
|
25
|
+
import org.embulk.spi.type.TimestampType;
|
26
|
+
import org.embulk.spi.type.Type;
|
27
|
+
import org.joda.time.DateTimeZone;
|
17
28
|
import org.msgpack.value.Value;
|
18
29
|
|
19
30
|
import org.slf4j.Logger;
|
20
31
|
|
32
|
+
import java.util.HashMap;
|
21
33
|
|
22
34
|
class ColumnCaster
|
23
35
|
{
|
@@ -27,6 +39,8 @@ class ColumnCaster
|
|
27
39
|
private final Schema outputSchema;
|
28
40
|
private final PageReader pageReader;
|
29
41
|
private final PageBuilder pageBuilder;
|
42
|
+
private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
|
43
|
+
private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
|
30
44
|
private final JsonVisitor jsonVisitor;
|
31
45
|
|
32
46
|
ColumnCaster(TypecastFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema,
|
@@ -37,24 +51,78 @@ class ColumnCaster
|
|
37
51
|
this.outputSchema = outputSchema;
|
38
52
|
this.pageReader = pageReader;
|
39
53
|
this.pageBuilder = pageBuilder;
|
54
|
+
|
55
|
+
buildTimestampParserMap();
|
56
|
+
buildTimestampFormatterMap();
|
40
57
|
this.jsonVisitor = new JsonVisitor(task, inputSchema, outputSchema);
|
41
58
|
}
|
42
59
|
|
43
|
-
|
60
|
+
private void buildTimestampParserMap()
|
61
|
+
{
|
62
|
+
// columnName => TimestampParser
|
63
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
64
|
+
if (columnConfig.getName().startsWith("$.")) {
|
65
|
+
continue; // type: json columns do not support type: timestamp
|
66
|
+
}
|
67
|
+
Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
|
68
|
+
if (inputColumn.getType() instanceof StringType && columnConfig.getType() instanceof TimestampType) {
|
69
|
+
TimestampParser parser = getTimestampParser(columnConfig, task);
|
70
|
+
this.timestampParserMap.put(columnConfig.getName(), parser);
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
private void buildTimestampFormatterMap()
|
76
|
+
{
|
77
|
+
// columnName => TimestampFormatter
|
78
|
+
for (ColumnConfig columnConfig : task.getColumns()) {
|
79
|
+
if (columnConfig.getName().startsWith("$.")) {
|
80
|
+
continue; // type: json columns do not have type: timestamp
|
81
|
+
}
|
82
|
+
Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
|
83
|
+
if (inputColumn.getType() instanceof TimestampType && columnConfig.getType() instanceof StringType) {
|
84
|
+
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
85
|
+
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
|
91
|
+
{
|
92
|
+
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
93
|
+
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
94
|
+
return new TimestampParser(task.getJRuby(), format, timezone);
|
95
|
+
}
|
96
|
+
|
97
|
+
private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
|
98
|
+
{
|
99
|
+
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
100
|
+
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
101
|
+
return new TimestampFormatter(task.getJRuby(), format, timezone);
|
102
|
+
}
|
103
|
+
|
104
|
+
public void setFromBoolean(Column outputColumn, boolean value)
|
105
|
+
{
|
44
106
|
Type outputType = outputColumn.getType();
|
45
107
|
if (outputType instanceof BooleanType) {
|
46
108
|
pageBuilder.setBoolean(outputColumn, BooleanCast.asBoolean(value));
|
47
|
-
}
|
109
|
+
}
|
110
|
+
else if (outputType instanceof LongType) {
|
48
111
|
pageBuilder.setLong(outputColumn, BooleanCast.asLong(value));
|
49
|
-
}
|
112
|
+
}
|
113
|
+
else if (outputType instanceof DoubleType) {
|
50
114
|
pageBuilder.setDouble(outputColumn, BooleanCast.asDouble(value));
|
51
|
-
}
|
115
|
+
}
|
116
|
+
else if (outputType instanceof StringType) {
|
52
117
|
pageBuilder.setString(outputColumn, BooleanCast.asString(value));
|
53
|
-
}
|
118
|
+
}
|
119
|
+
else if (outputType instanceof TimestampType) {
|
54
120
|
pageBuilder.setTimestamp(outputColumn, BooleanCast.asTimestamp(value));
|
55
|
-
}
|
121
|
+
}
|
122
|
+
else if (outputType instanceof JsonType) {
|
56
123
|
pageBuilder.setJson(outputColumn, BooleanCast.asJson(value));
|
57
|
-
}
|
124
|
+
}
|
125
|
+
else {
|
58
126
|
assert (false);
|
59
127
|
}
|
60
128
|
}
|
@@ -64,86 +132,107 @@ class ColumnCaster
|
|
64
132
|
Type outputType = outputColumn.getType();
|
65
133
|
if (outputType instanceof BooleanType) {
|
66
134
|
pageBuilder.setBoolean(outputColumn, LongCast.asBoolean(value));
|
67
|
-
}
|
135
|
+
}
|
136
|
+
else if (outputType instanceof LongType) {
|
68
137
|
pageBuilder.setLong(outputColumn, LongCast.asLong(value));
|
69
|
-
}
|
138
|
+
}
|
139
|
+
else if (outputType instanceof DoubleType) {
|
70
140
|
pageBuilder.setDouble(outputColumn, LongCast.asDouble(value));
|
71
|
-
}
|
141
|
+
}
|
142
|
+
else if (outputType instanceof StringType) {
|
72
143
|
pageBuilder.setString(outputColumn, LongCast.asString(value));
|
73
|
-
}
|
144
|
+
}
|
145
|
+
else if (outputType instanceof TimestampType) {
|
74
146
|
pageBuilder.setTimestamp(outputColumn, LongCast.asTimestamp(value));
|
75
|
-
}
|
147
|
+
}
|
148
|
+
else if (outputType instanceof JsonType) {
|
76
149
|
pageBuilder.setJson(outputColumn, LongCast.asJson(value));
|
77
|
-
}
|
78
|
-
|
150
|
+
}
|
151
|
+
else {
|
152
|
+
assert false;
|
79
153
|
}
|
80
154
|
}
|
81
155
|
|
82
156
|
public void setFromDouble(Column outputColumn, double value)
|
83
157
|
{
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
pageBuilder.setBoolean(outputColumn, DoubleCast.asBoolean(value));
|
88
|
-
} else if (outputType instanceof LongType) {
|
89
|
-
pageBuilder.setLong(outputColumn, DoubleCast.asLong(value));
|
90
|
-
} else if (outputType instanceof DoubleType) {
|
91
|
-
pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value));
|
92
|
-
} else if (outputType instanceof StringType) {
|
93
|
-
pageBuilder.setString(outputColumn, DoubleCast.asString(value));
|
94
|
-
} else if (outputType instanceof TimestampType) {
|
95
|
-
pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value));
|
96
|
-
} else if (outputType instanceof JsonType) {
|
97
|
-
pageBuilder.setJson(outputColumn, DoubleCast.asJson(value));
|
98
|
-
} else {
|
99
|
-
assert (false);
|
100
|
-
}
|
158
|
+
Type outputType = outputColumn.getType();
|
159
|
+
if (outputType instanceof BooleanType) {
|
160
|
+
pageBuilder.setBoolean(outputColumn, DoubleCast.asBoolean(value));
|
101
161
|
}
|
102
|
-
|
103
|
-
|
162
|
+
else if (outputType instanceof LongType) {
|
163
|
+
pageBuilder.setLong(outputColumn, DoubleCast.asLong(value));
|
164
|
+
}
|
165
|
+
else if (outputType instanceof DoubleType) {
|
166
|
+
pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value));
|
167
|
+
}
|
168
|
+
else if (outputType instanceof StringType) {
|
169
|
+
pageBuilder.setString(outputColumn, DoubleCast.asString(value));
|
170
|
+
}
|
171
|
+
else if (outputType instanceof TimestampType) {
|
172
|
+
pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value));
|
173
|
+
}
|
174
|
+
else if (outputType instanceof JsonType) {
|
175
|
+
pageBuilder.setJson(outputColumn, DoubleCast.asJson(value));
|
176
|
+
}
|
177
|
+
else {
|
178
|
+
assert false;
|
104
179
|
}
|
105
180
|
}
|
106
181
|
|
107
|
-
public void setFromString(Column outputColumn, String value
|
182
|
+
public void setFromString(Column outputColumn, String value)
|
108
183
|
{
|
109
184
|
Type outputType = outputColumn.getType();
|
110
185
|
if (outputType instanceof BooleanType) {
|
111
186
|
pageBuilder.setBoolean(outputColumn, StringCast.asBoolean(value));
|
112
|
-
}
|
187
|
+
}
|
188
|
+
else if (outputType instanceof LongType) {
|
113
189
|
pageBuilder.setLong(outputColumn, StringCast.asLong(value));
|
114
|
-
}
|
190
|
+
}
|
191
|
+
else if (outputType instanceof DoubleType) {
|
115
192
|
pageBuilder.setDouble(outputColumn, StringCast.asDouble(value));
|
116
|
-
}
|
193
|
+
}
|
194
|
+
else if (outputType instanceof StringType) {
|
117
195
|
pageBuilder.setString(outputColumn, StringCast.asString(value));
|
118
|
-
}
|
196
|
+
}
|
197
|
+
else if (outputType instanceof TimestampType) {
|
198
|
+
TimestampParser timestampParser = timestampParserMap.get(outputColumn.getName());
|
119
199
|
pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
|
120
|
-
}
|
200
|
+
}
|
201
|
+
else if (outputType instanceof JsonType) {
|
121
202
|
Value jsonValue = StringCast.asJson(value);
|
122
203
|
String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
|
123
204
|
Value castedValue = jsonVisitor.visit(jsonPath, jsonValue);
|
124
205
|
pageBuilder.setJson(outputColumn, castedValue);
|
125
|
-
}
|
126
|
-
|
206
|
+
}
|
207
|
+
else {
|
208
|
+
assert false;
|
127
209
|
}
|
128
210
|
}
|
129
211
|
|
130
|
-
public void setFromTimestamp(Column outputColumn, Timestamp value
|
212
|
+
public void setFromTimestamp(Column outputColumn, Timestamp value)
|
131
213
|
{
|
132
214
|
Type outputType = outputColumn.getType();
|
133
215
|
if (outputType instanceof BooleanType) {
|
134
216
|
pageBuilder.setBoolean(outputColumn, TimestampCast.asBoolean(value));
|
135
|
-
}
|
217
|
+
}
|
218
|
+
else if (outputType instanceof LongType) {
|
136
219
|
pageBuilder.setLong(outputColumn, TimestampCast.asLong(value));
|
137
|
-
}
|
220
|
+
}
|
221
|
+
else if (outputType instanceof DoubleType) {
|
138
222
|
pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value));
|
139
|
-
}
|
223
|
+
}
|
224
|
+
else if (outputType instanceof StringType) {
|
225
|
+
TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName());
|
140
226
|
pageBuilder.setString(outputColumn, TimestampCast.asString(value, timestampFormatter));
|
141
|
-
}
|
227
|
+
}
|
228
|
+
else if (outputType instanceof TimestampType) {
|
142
229
|
pageBuilder.setTimestamp(outputColumn, TimestampCast.asTimestamp(value));
|
143
|
-
}
|
230
|
+
}
|
231
|
+
else if (outputType instanceof JsonType) {
|
144
232
|
pageBuilder.setJson(outputColumn, TimestampCast.asJson(value));
|
145
|
-
}
|
146
|
-
|
233
|
+
}
|
234
|
+
else {
|
235
|
+
assert false;
|
147
236
|
}
|
148
237
|
}
|
149
238
|
|
@@ -154,18 +243,24 @@ class ColumnCaster
|
|
154
243
|
Type outputType = outputColumn.getType();
|
155
244
|
if (outputType instanceof BooleanType) {
|
156
245
|
pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(castedValue));
|
157
|
-
}
|
246
|
+
}
|
247
|
+
else if (outputType instanceof LongType) {
|
158
248
|
pageBuilder.setLong(outputColumn, JsonCast.asLong(castedValue));
|
159
|
-
}
|
249
|
+
}
|
250
|
+
else if (outputType instanceof DoubleType) {
|
160
251
|
pageBuilder.setDouble(outputColumn, JsonCast.asDouble(castedValue));
|
161
|
-
}
|
252
|
+
}
|
253
|
+
else if (outputType instanceof StringType) {
|
162
254
|
pageBuilder.setString(outputColumn, JsonCast.asString(castedValue));
|
163
|
-
}
|
255
|
+
}
|
256
|
+
else if (outputType instanceof TimestampType) {
|
164
257
|
pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(castedValue));
|
165
|
-
}
|
258
|
+
}
|
259
|
+
else if (outputType instanceof JsonType) {
|
166
260
|
pageBuilder.setJson(outputColumn, JsonCast.asJson(castedValue));
|
167
|
-
}
|
168
|
-
|
261
|
+
}
|
262
|
+
else {
|
263
|
+
assert false;
|
169
264
|
}
|
170
265
|
}
|
171
266
|
}
|
@@ -1,21 +1,19 @@
|
|
1
1
|
package org.embulk.filter.typecast;
|
2
2
|
|
3
|
-
import org.embulk.spi.*;
|
4
|
-
import org.embulk.spi.type.StringType;
|
5
|
-
import org.embulk.spi.type.TimestampType;
|
6
|
-
|
7
|
-
import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
|
8
3
|
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
9
4
|
|
10
|
-
import org.embulk.spi.
|
11
|
-
import org.embulk.spi.
|
12
|
-
import org.
|
5
|
+
import org.embulk.spi.Column;
|
6
|
+
import org.embulk.spi.ColumnVisitor;
|
7
|
+
import org.embulk.spi.DataException;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.PageBuilder;
|
10
|
+
import org.embulk.spi.PageReader;
|
11
|
+
import org.embulk.spi.Schema;
|
13
12
|
import org.slf4j.Logger;
|
14
13
|
|
15
14
|
import java.util.HashMap;
|
16
15
|
|
17
|
-
|
18
|
-
implements ColumnVisitor
|
16
|
+
class ColumnVisitorImpl implements ColumnVisitor
|
19
17
|
{
|
20
18
|
private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
|
21
19
|
private final PluginTask task;
|
@@ -24,8 +22,6 @@ public class ColumnVisitorImpl
|
|
24
22
|
private final PageReader pageReader;
|
25
23
|
private final PageBuilder pageBuilder;
|
26
24
|
private final HashMap<String, Column> outputColumnMap = new HashMap<>();
|
27
|
-
private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
|
28
|
-
private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
|
29
25
|
private final ColumnCaster columnCaster;
|
30
26
|
|
31
27
|
ColumnVisitorImpl(PluginTask task, Schema inputSchema, Schema outputSchema,
|
@@ -40,8 +36,6 @@ public class ColumnVisitorImpl
|
|
40
36
|
this.columnCaster = new ColumnCaster(task, inputSchema, outputSchema, pageReader, pageBuilder);
|
41
37
|
|
42
38
|
buildOutputColumnMap();
|
43
|
-
buildTimestampParserMap();
|
44
|
-
buildTimestampFormatterMap();
|
45
39
|
}
|
46
40
|
|
47
41
|
private void buildOutputColumnMap()
|
@@ -52,66 +46,26 @@ public class ColumnVisitorImpl
|
|
52
46
|
}
|
53
47
|
}
|
54
48
|
|
55
|
-
private void buildTimestampParserMap()
|
56
|
-
{
|
57
|
-
// columnName => TimestampParser
|
58
|
-
for (ColumnConfig columnConfig : task.getColumns()) {
|
59
|
-
if (columnConfig.getName().startsWith("$.")) {
|
60
|
-
continue; // type: json columns do not support type: timestamp
|
61
|
-
}
|
62
|
-
Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
|
63
|
-
if (inputColumn.getType() instanceof StringType && columnConfig.getType() instanceof TimestampType) {
|
64
|
-
TimestampParser parser = getTimestampParser(columnConfig, task);
|
65
|
-
this.timestampParserMap.put(columnConfig.getName(), parser);
|
66
|
-
}
|
67
|
-
}
|
68
|
-
}
|
69
|
-
|
70
|
-
private void buildTimestampFormatterMap()
|
71
|
-
{
|
72
|
-
// columnName => TimestampFormatter
|
73
|
-
for (ColumnConfig columnConfig : task.getColumns()) {
|
74
|
-
if (columnConfig.getName().startsWith("$.")) {
|
75
|
-
continue; // type: json columns do not have type: timestamp
|
76
|
-
}
|
77
|
-
Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
|
78
|
-
if (inputColumn.getType() instanceof TimestampType && columnConfig.getType() instanceof StringType) {
|
79
|
-
TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
|
80
|
-
this.timestampFormatterMap.put(columnConfig.getName(), parser);
|
81
|
-
}
|
82
|
-
}
|
83
|
-
}
|
84
|
-
|
85
|
-
private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
|
86
|
-
{
|
87
|
-
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
88
|
-
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
89
|
-
return new TimestampParser(task.getJRuby(), format, timezone);
|
90
|
-
}
|
91
|
-
|
92
|
-
private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
|
93
|
-
{
|
94
|
-
String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
|
95
|
-
DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
|
96
|
-
return new TimestampFormatter(task.getJRuby(), format, timezone);
|
97
|
-
}
|
98
|
-
|
99
49
|
private interface PageBuildable
|
100
50
|
{
|
101
|
-
|
51
|
+
void run() throws DataException;
|
102
52
|
}
|
103
53
|
|
104
|
-
private void withStopOnInvalidRecord(final PageBuildable op, final Column inputColumn, final Column outputColumn)
|
54
|
+
private void withStopOnInvalidRecord(final PageBuildable op, final Column inputColumn, final Column outputColumn)
|
55
|
+
throws DataException
|
56
|
+
{
|
105
57
|
if (pageReader.isNull(inputColumn)) {
|
106
58
|
pageBuilder.setNull(outputColumn);
|
107
59
|
}
|
108
60
|
else {
|
109
61
|
if (task.getStopOnInvalidRecord()) {
|
110
62
|
op.run();
|
111
|
-
}
|
63
|
+
}
|
64
|
+
else {
|
112
65
|
try {
|
113
66
|
op.run();
|
114
|
-
}
|
67
|
+
}
|
68
|
+
catch (final DataException ex) {
|
115
69
|
logger.warn(ex.getMessage());
|
116
70
|
pageBuilder.setNull(outputColumn);
|
117
71
|
}
|
@@ -124,7 +78,8 @@ public class ColumnVisitorImpl
|
|
124
78
|
{
|
125
79
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
126
80
|
PageBuildable op = new PageBuildable() {
|
127
|
-
public void run() throws DataException
|
81
|
+
public void run() throws DataException
|
82
|
+
{
|
128
83
|
columnCaster.setFromBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
129
84
|
}
|
130
85
|
};
|
@@ -136,7 +91,8 @@ public class ColumnVisitorImpl
|
|
136
91
|
{
|
137
92
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
138
93
|
PageBuildable op = new PageBuildable() {
|
139
|
-
public void run() throws DataException
|
94
|
+
public void run() throws DataException
|
95
|
+
{
|
140
96
|
columnCaster.setFromLong(outputColumn, pageReader.getLong(inputColumn));
|
141
97
|
}
|
142
98
|
};
|
@@ -148,7 +104,8 @@ public class ColumnVisitorImpl
|
|
148
104
|
{
|
149
105
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
150
106
|
PageBuildable op = new PageBuildable() {
|
151
|
-
public void run() throws DataException
|
107
|
+
public void run() throws DataException
|
108
|
+
{
|
152
109
|
columnCaster.setFromDouble(outputColumn, pageReader.getDouble(inputColumn));
|
153
110
|
}
|
154
111
|
};
|
@@ -159,10 +116,10 @@ public class ColumnVisitorImpl
|
|
159
116
|
public void stringColumn(final Column inputColumn)
|
160
117
|
{
|
161
118
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
162
|
-
final TimestampParser timestampParser = timestampParserMap.get(inputColumn.getName());
|
163
119
|
PageBuildable op = new PageBuildable() {
|
164
|
-
public void run() throws DataException
|
165
|
-
|
120
|
+
public void run() throws DataException
|
121
|
+
{
|
122
|
+
columnCaster.setFromString(outputColumn, pageReader.getString(inputColumn));
|
166
123
|
}
|
167
124
|
};
|
168
125
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -172,10 +129,10 @@ public class ColumnVisitorImpl
|
|
172
129
|
public void timestampColumn(final Column inputColumn)
|
173
130
|
{
|
174
131
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
175
|
-
final TimestampFormatter timestampFormatter = timestampFormatterMap.get(inputColumn.getName());
|
176
132
|
PageBuildable op = new PageBuildable() {
|
177
|
-
public void run() throws DataException
|
178
|
-
|
133
|
+
public void run() throws DataException
|
134
|
+
{
|
135
|
+
columnCaster.setFromTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
179
136
|
}
|
180
137
|
};
|
181
138
|
withStopOnInvalidRecord(op, inputColumn, outputColumn);
|
@@ -186,7 +143,8 @@ public class ColumnVisitorImpl
|
|
186
143
|
{
|
187
144
|
final Column outputColumn = outputColumnMap.get(inputColumn.getName());
|
188
145
|
PageBuildable op = new PageBuildable() {
|
189
|
-
public void run() throws DataException
|
146
|
+
public void run() throws DataException
|
147
|
+
{
|
190
148
|
columnCaster.setFromJson(outputColumn, pageReader.getJson(inputColumn));
|
191
149
|
}
|
192
150
|
};
|
@@ -1,34 +1,45 @@
|
|
1
1
|
package org.embulk.filter.typecast;
|
2
2
|
|
3
|
-
import org.embulk.filter.typecast.cast
|
4
|
-
|
3
|
+
import org.embulk.filter.typecast.cast.BooleanCast;
|
4
|
+
import org.embulk.filter.typecast.cast.DoubleCast;
|
5
|
+
import org.embulk.filter.typecast.cast.LongCast;
|
6
|
+
import org.embulk.filter.typecast.cast.StringCast;
|
5
7
|
import org.embulk.spi.DataException;
|
6
|
-
import org.embulk.spi.type
|
8
|
+
import org.embulk.spi.type.BooleanType;
|
9
|
+
import org.embulk.spi.type.DoubleType;
|
10
|
+
import org.embulk.spi.type.JsonType;
|
11
|
+
import org.embulk.spi.type.LongType;
|
12
|
+
import org.embulk.spi.type.StringType;
|
13
|
+
import org.embulk.spi.type.Type;
|
7
14
|
import org.msgpack.value.BooleanValue;
|
8
|
-
import org.msgpack.value.IntegerValue;
|
9
15
|
import org.msgpack.value.FloatValue;
|
16
|
+
import org.msgpack.value.IntegerValue;
|
10
17
|
import org.msgpack.value.StringValue;
|
11
18
|
import org.msgpack.value.Value;
|
12
19
|
import org.msgpack.value.ValueFactory;
|
13
20
|
|
14
21
|
class JsonCaster
|
15
22
|
{
|
16
|
-
public JsonCaster()
|
17
|
-
{
|
18
|
-
}
|
23
|
+
public JsonCaster() {}
|
19
24
|
|
20
|
-
public Value fromBoolean(Type outputType, BooleanValue value)
|
25
|
+
public Value fromBoolean(Type outputType, BooleanValue value)
|
26
|
+
{
|
21
27
|
if (outputType instanceof BooleanType) {
|
22
28
|
return value;
|
23
|
-
}
|
29
|
+
}
|
30
|
+
else if (outputType instanceof LongType) {
|
24
31
|
return ValueFactory.newInteger(BooleanCast.asLong(value.getBoolean()));
|
25
|
-
}
|
32
|
+
}
|
33
|
+
else if (outputType instanceof DoubleType) {
|
26
34
|
return ValueFactory.newFloat(BooleanCast.asDouble(value.getBoolean()));
|
27
|
-
}
|
35
|
+
}
|
36
|
+
else if (outputType instanceof StringType) {
|
28
37
|
return ValueFactory.newString(BooleanCast.asString(value.getBoolean()));
|
29
|
-
}
|
38
|
+
}
|
39
|
+
else if (outputType instanceof JsonType) {
|
30
40
|
throw new DataException(String.format("cannot cast boolean to json: \"%s\"", value));
|
31
|
-
}
|
41
|
+
}
|
42
|
+
else {
|
32
43
|
assert (false);
|
33
44
|
return null;
|
34
45
|
}
|
@@ -38,16 +49,21 @@ class JsonCaster
|
|
38
49
|
{
|
39
50
|
if (outputType instanceof BooleanType) {
|
40
51
|
return ValueFactory.newBoolean(LongCast.asBoolean(value.asLong()));
|
41
|
-
}
|
52
|
+
}
|
53
|
+
else if (outputType instanceof LongType) {
|
42
54
|
return value;
|
43
|
-
}
|
55
|
+
}
|
56
|
+
else if (outputType instanceof DoubleType) {
|
44
57
|
return ValueFactory.newFloat(LongCast.asDouble(value.asLong()));
|
45
|
-
}
|
58
|
+
}
|
59
|
+
else if (outputType instanceof StringType) {
|
46
60
|
return ValueFactory.newString(LongCast.asString(value.asLong()));
|
47
|
-
}
|
61
|
+
}
|
62
|
+
else if (outputType instanceof JsonType) {
|
48
63
|
throw new DataException(String.format("cannot cast long to json:: \"%s\"", value));
|
49
|
-
}
|
50
|
-
|
64
|
+
}
|
65
|
+
else {
|
66
|
+
assert false;
|
51
67
|
return null;
|
52
68
|
}
|
53
69
|
}
|
@@ -56,15 +72,20 @@ class JsonCaster
|
|
56
72
|
{
|
57
73
|
if (outputType instanceof BooleanType) {
|
58
74
|
return ValueFactory.newBoolean(DoubleCast.asBoolean(value.toDouble()));
|
59
|
-
}
|
75
|
+
}
|
76
|
+
else if (outputType instanceof LongType) {
|
60
77
|
return ValueFactory.newInteger(DoubleCast.asLong(value.toDouble()));
|
61
|
-
}
|
78
|
+
}
|
79
|
+
else if (outputType instanceof DoubleType) {
|
62
80
|
return value;
|
63
|
-
}
|
81
|
+
}
|
82
|
+
else if (outputType instanceof StringType) {
|
64
83
|
return ValueFactory.newString(DoubleCast.asString(value.toDouble()));
|
65
|
-
}
|
84
|
+
}
|
85
|
+
else if (outputType instanceof JsonType) {
|
66
86
|
throw new DataException(String.format("cannot cast double to json:: \"%s\"", value));
|
67
|
-
}
|
87
|
+
}
|
88
|
+
else {
|
68
89
|
assert (false);
|
69
90
|
return null;
|
70
91
|
}
|
@@ -74,16 +95,21 @@ class JsonCaster
|
|
74
95
|
{
|
75
96
|
if (outputType instanceof BooleanType) {
|
76
97
|
return ValueFactory.newBoolean(StringCast.asBoolean(value.asString()));
|
77
|
-
}
|
98
|
+
}
|
99
|
+
else if (outputType instanceof LongType) {
|
78
100
|
return ValueFactory.newInteger(StringCast.asLong(value.asString()));
|
79
|
-
}
|
101
|
+
}
|
102
|
+
else if (outputType instanceof DoubleType) {
|
80
103
|
return ValueFactory.newFloat(StringCast.asDouble(value.asString()));
|
81
|
-
}
|
104
|
+
}
|
105
|
+
else if (outputType instanceof StringType) {
|
82
106
|
return value;
|
83
|
-
}
|
107
|
+
}
|
108
|
+
else if (outputType instanceof JsonType) {
|
84
109
|
return StringCast.asJson(value.asString());
|
85
|
-
}
|
86
|
-
|
110
|
+
}
|
111
|
+
else {
|
112
|
+
assert false;
|
87
113
|
return null;
|
88
114
|
}
|
89
115
|
}
|
@@ -1,15 +1,16 @@
|
|
1
1
|
package org.embulk.filter.typecast;
|
2
2
|
|
3
|
-
import org.embulk.
|
3
|
+
import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
|
4
|
+
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
5
|
+
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.embulk.spi.Schema;
|
4
8
|
import org.embulk.spi.type.Type;
|
5
9
|
import org.msgpack.value.ArrayValue;
|
6
10
|
import org.msgpack.value.MapValue;
|
7
11
|
import org.msgpack.value.Value;
|
8
12
|
import org.msgpack.value.ValueFactory;
|
9
13
|
|
10
|
-
import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
|
11
|
-
import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
|
12
|
-
|
13
14
|
import org.slf4j.Logger;
|
14
15
|
|
15
16
|
import java.util.HashMap;
|
@@ -65,6 +66,7 @@ public class JsonVisitor
|
|
65
66
|
partialPath.append(".").append(arrayParts[0]);
|
66
67
|
this.shouldVisitSet.add(partialPath.toString());
|
67
68
|
for (int j = 1; j < arrayParts.length; j++) {
|
69
|
+
// Supports both [0] and [*]
|
68
70
|
partialPath.append("[").append(arrayParts[j]);
|
69
71
|
this.shouldVisitSet.add(partialPath.toString());
|
70
72
|
}
|
@@ -93,6 +95,9 @@ public class JsonVisitor
|
|
93
95
|
Value[] newValue = new Value[size];
|
94
96
|
for (int i = 0; i < size; i++) {
|
95
97
|
String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
|
98
|
+
if (!shouldVisit(k)) {
|
99
|
+
k = new StringBuilder(jsonPath).append("[*]").toString(); // try [*] too
|
100
|
+
}
|
96
101
|
Value v = arrayValue.get(i);
|
97
102
|
newValue[i] = visit(k, v);
|
98
103
|
}
|
@@ -10,11 +10,14 @@ import org.embulk.config.ConfigSource;
|
|
10
10
|
import org.embulk.config.Task;
|
11
11
|
import org.embulk.config.TaskSource;
|
12
12
|
|
13
|
-
import org.embulk.spi
|
14
|
-
import org.embulk.spi.
|
15
|
-
import org.embulk.spi.
|
16
|
-
import org.embulk.spi.
|
17
|
-
|
13
|
+
import org.embulk.spi.Column;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.FilterPlugin;
|
16
|
+
import org.embulk.spi.Page;
|
17
|
+
import org.embulk.spi.PageBuilder;
|
18
|
+
import org.embulk.spi.PageOutput;
|
19
|
+
import org.embulk.spi.PageReader;
|
20
|
+
import org.embulk.spi.Schema;
|
18
21
|
import org.embulk.spi.type.TimestampType;
|
19
22
|
import org.embulk.spi.type.Type;
|
20
23
|
import org.joda.time.DateTimeZone;
|
@@ -59,7 +62,6 @@ public class TypecastFilterPlugin implements FilterPlugin
|
|
59
62
|
@ConfigDefault("false")
|
60
63
|
Boolean getStopOnInvalidRecord();
|
61
64
|
|
62
|
-
|
63
65
|
@Config("default_timezone")
|
64
66
|
@ConfigDefault("\"UTC\"")
|
65
67
|
public DateTimeZone getDefaultTimeZone();
|
@@ -91,7 +93,8 @@ public class TypecastFilterPlugin implements FilterPlugin
|
|
91
93
|
String name = columnConfig.getName();
|
92
94
|
if (name.startsWith("$.")) { // check only top level column name
|
93
95
|
String firstName = name.split("\\.", 3)[1];
|
94
|
-
|
96
|
+
String firstNameWithoutArray = firstName.split("\\[")[0];
|
97
|
+
inputSchema.lookupColumn(firstNameWithoutArray);
|
95
98
|
}
|
96
99
|
else {
|
97
100
|
inputSchema.lookupColumn(name);
|
@@ -137,7 +140,7 @@ public class TypecastFilterPlugin implements FilterPlugin
|
|
137
140
|
|
138
141
|
@Override
|
139
142
|
public PageOutput open(final TaskSource taskSource, final Schema inputSchema,
|
140
|
-
|
143
|
+
final Schema outputSchema, final PageOutput output)
|
141
144
|
{
|
142
145
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
143
146
|
|
@@ -1,13 +1,16 @@
|
|
1
1
|
package org.embulk.filter.typecast.cast;
|
2
2
|
|
3
|
-
import org.embulk.spi.time.Timestamp;
|
4
3
|
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
5
|
import org.msgpack.value.Value;
|
6
6
|
|
7
|
-
public class BooleanCast
|
7
|
+
public class BooleanCast
|
8
|
+
{
|
9
|
+
private BooleanCast() {}
|
10
|
+
|
8
11
|
private static String buildErrorMessage(String as, boolean value)
|
9
12
|
{
|
10
|
-
return String.format("cannot cast
|
13
|
+
return String.format("cannot cast boolean to %s: \"%s\"", as, value);
|
11
14
|
}
|
12
15
|
|
13
16
|
public static boolean asBoolean(boolean value) throws DataException
|
@@ -1,10 +1,13 @@
|
|
1
1
|
package org.embulk.filter.typecast.cast;
|
2
2
|
|
3
|
-
import org.embulk.spi.time.Timestamp;
|
4
3
|
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
5
|
import org.msgpack.value.Value;
|
6
6
|
|
7
|
-
public class DoubleCast
|
7
|
+
public class DoubleCast
|
8
|
+
{
|
9
|
+
private DoubleCast() {}
|
10
|
+
|
8
11
|
private static String buildErrorMessage(String as, double value)
|
9
12
|
{
|
10
13
|
return String.format("cannot cast double to %s: \"%s\"", as, value);
|
@@ -17,7 +20,7 @@ public class DoubleCast {
|
|
17
20
|
|
18
21
|
public static long asLong(double value) throws DataException
|
19
22
|
{
|
20
|
-
return (long)value;
|
23
|
+
return (long) value;
|
21
24
|
}
|
22
25
|
|
23
26
|
public static double asDouble(double value) throws DataException
|
@@ -37,7 +40,7 @@ public class DoubleCast {
|
|
37
40
|
|
38
41
|
public static Timestamp asTimestamp(double value) throws DataException
|
39
42
|
{
|
40
|
-
long epochSecond = (long)value;
|
43
|
+
long epochSecond = (long) value;
|
41
44
|
long nanoAdjustMent = (long) ((value - epochSecond) * 1000000000);
|
42
45
|
return Timestamp.ofEpochSecond(epochSecond, nanoAdjustMent);
|
43
46
|
}
|
@@ -4,10 +4,13 @@ import org.embulk.spi.DataException;
|
|
4
4
|
import org.embulk.spi.time.Timestamp;
|
5
5
|
import org.msgpack.value.Value;
|
6
6
|
|
7
|
-
public class JsonCast
|
7
|
+
public class JsonCast
|
8
|
+
{
|
9
|
+
private JsonCast() {}
|
10
|
+
|
8
11
|
private static String buildErrorMessage(String as, Value value)
|
9
12
|
{
|
10
|
-
return String.format("cannot cast
|
13
|
+
return String.format("cannot cast Json to %s: \"%s\"", as, value);
|
11
14
|
}
|
12
15
|
|
13
16
|
public static boolean asBoolean(Value value) throws DataException
|
@@ -1,10 +1,13 @@
|
|
1
1
|
package org.embulk.filter.typecast.cast;
|
2
2
|
|
3
|
-
import org.embulk.spi.time.Timestamp;
|
4
3
|
import org.embulk.spi.DataException;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
5
|
import org.msgpack.value.Value;
|
6
6
|
|
7
|
-
public class LongCast
|
7
|
+
public class LongCast
|
8
|
+
{
|
9
|
+
private LongCast() {}
|
10
|
+
|
8
11
|
private static String buildErrorMessage(String as, long value)
|
9
12
|
{
|
10
13
|
return String.format("cannot cast long to %s: \"%s\"", as, value);
|
@@ -30,7 +33,7 @@ public class LongCast {
|
|
30
33
|
|
31
34
|
public static double asDouble(long value) throws DataException
|
32
35
|
{
|
33
|
-
return (double)value;
|
36
|
+
return (double) value;
|
34
37
|
}
|
35
38
|
|
36
39
|
public static String asString(long value) throws DataException
|
@@ -2,14 +2,15 @@ package org.embulk.filter.typecast.cast;
|
|
2
2
|
|
3
3
|
import com.google.common.collect.ImmutableSet;
|
4
4
|
import org.embulk.spi.DataException;
|
5
|
-
import org.embulk.spi.json.JsonParser;
|
6
5
|
import org.embulk.spi.json.JsonParseException;
|
6
|
+
import org.embulk.spi.json.JsonParser;
|
7
7
|
import org.embulk.spi.time.Timestamp;
|
8
|
-
import org.embulk.spi.time.TimestampParser;
|
9
8
|
import org.embulk.spi.time.TimestampParseException;
|
9
|
+
import org.embulk.spi.time.TimestampParser;
|
10
10
|
import org.msgpack.value.Value;
|
11
11
|
|
12
|
-
public class StringCast
|
12
|
+
public class StringCast
|
13
|
+
{
|
13
14
|
private static final JsonParser jsonParser = new JsonParser();
|
14
15
|
|
15
16
|
// copy from csv plugin
|
@@ -29,6 +30,7 @@ public class StringCast {
|
|
29
30
|
"off", "Off", "OFF",
|
30
31
|
"0");
|
31
32
|
|
33
|
+
private StringCast() {}
|
32
34
|
|
33
35
|
private static String buildErrorMessage(String as, String value)
|
34
36
|
{
|
@@ -5,7 +5,10 @@ import org.embulk.spi.time.Timestamp;
|
|
5
5
|
import org.embulk.spi.time.TimestampFormatter;
|
6
6
|
import org.msgpack.value.Value;
|
7
7
|
|
8
|
-
public class TimestampCast
|
8
|
+
public class TimestampCast
|
9
|
+
{
|
10
|
+
private TimestampCast() {}
|
11
|
+
|
9
12
|
private static String buildErrorMessage(String as, Timestamp value)
|
10
13
|
{
|
11
14
|
return String.format("cannot cast Timestamp to %s: \"%s\"", as, value);
|
@@ -25,7 +28,7 @@ public class TimestampCast {
|
|
25
28
|
{
|
26
29
|
long epochSecond = value.getEpochSecond();
|
27
30
|
long nano = value.getNano();
|
28
|
-
return epochSecond + ((double)nano / 1000000000.0);
|
31
|
+
return epochSecond + ((double) nano / 1000000000.0);
|
29
32
|
}
|
30
33
|
|
31
34
|
public static String asString(Timestamp value, TimestampFormatter formatter) throws DataException
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-typecast
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,7 @@ files:
|
|
52
52
|
- README.md
|
53
53
|
- build.gradle
|
54
54
|
- config/checkstyle/checkstyle.xml
|
55
|
+
- example/empty.yml
|
55
56
|
- example/example.csv
|
56
57
|
- example/example.yml
|
57
58
|
- example/example2.yml
|
@@ -60,6 +61,7 @@ files:
|
|
60
61
|
- gradlew
|
61
62
|
- gradlew.bat
|
62
63
|
- lib/embulk/filter/typecast.rb
|
64
|
+
- settings.gradle
|
63
65
|
- src/main/java/org/embulk/filter/typecast/ColumnCaster.java
|
64
66
|
- src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java
|
65
67
|
- src/main/java/org/embulk/filter/typecast/JsonCaster.java
|
@@ -72,7 +74,7 @@ files:
|
|
72
74
|
- src/main/java/org/embulk/filter/typecast/cast/StringCast.java
|
73
75
|
- src/main/java/org/embulk/filter/typecast/cast/TimestampCast.java
|
74
76
|
- src/test/java/org/embulk/filter/TestTypecastFilterPlugin.java
|
75
|
-
- classpath/embulk-filter-typecast-0.1.
|
77
|
+
- classpath/embulk-filter-typecast-0.1.3.jar
|
76
78
|
homepage: https://github.com/sonots/embulk-filter-typecast
|
77
79
|
licenses:
|
78
80
|
- MIT
|