embulk-filter-typecast 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db51b360ea765f3b96a301ec98144cc1f0033ebe
4
- data.tar.gz: e9c35bc3f9af842a370b655bd81f8253d49d56e9
3
+ metadata.gz: e8a2482d0bd6fc6109bb8763f9a1f9db6b0733db
4
+ data.tar.gz: b80c4d01e823e3a4b59594002277fc78e1ee0ef9
5
5
  SHA512:
6
- metadata.gz: 7dd4e6b33e658ad72e22b7ad65fe4738244505ac7ebd75d6b2333ff3f260bdd9ef979f5b800d3608836ff928fcd4e1d56e78290dab8cabc5c7a079660706097d
7
- data.tar.gz: 6d356bbf862ad960684b65eda40e8bcae43f8559515cd1ee36468f92fb997f8a0e225c3b88dfe2ec0d00216e41bae65b494fed58046b3a36e90907965ed78f6a
6
+ metadata.gz: 7473b57b158d8936e14015358dcd21d3bfe852f0b7c33de10da4ad38aa4553b3ddf39e5b41c9ed11df32713c42a33506aba22820b2236590ab124035a1056783
7
+ data.tar.gz: 354bf8ab52c2ee5ba124af046abea9e311213c4ebaa03991f432504cc87094a0e1d4815c3c94434ee843de836f6e2146331540fd23b286071c000011bb64c55a
data/.gitignore CHANGED
@@ -6,7 +6,6 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
- *.csv
10
9
  .tags
11
10
  .ruby-version
12
11
  *.iml
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.2 (2016-04-28)
2
+
3
+ Enhancements:
4
+
5
+ * Support typecast in json path for casted json (from string)
6
+
1
7
  # 0.1.1 (2016-04-28)
2
8
 
3
9
  Enhancements:
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.1.2"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -0,0 +1,11 @@
1
+ timestamp,null,long,string,double,json1,json2,boolean
2
+ 2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"0"},{"long":0},true
3
+ 2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"1"},{"long":1},true
4
+ 2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,{"string":"2"},{"long":2},true
5
+ 2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,{"string":"3"},{"long":3},true
6
+ 2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,{"string":"4"},{"long":4},true
7
+ 2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,{"string":"5"},{"long":5},false
8
+ 2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,{"string":"6"},{"long":6},false
9
+ 2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,{"string":"7"},{"long":7},false
10
+ 2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"8"},{"long":8},false
11
+ 2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"9"},{"long":9},false
data/example/example2.yml CHANGED
@@ -14,7 +14,8 @@ in:
14
14
  - {name: long, type: string}
15
15
  - {name: string, type: string}
16
16
  - {name: double, type: string}
17
- - {name: json, type: string}
17
+ - {name: json1, type: string}
18
+ - {name: json2, type: string}
18
19
  - {name: boolean, type: boolean}
19
20
  filters:
20
21
  - type: typecast
@@ -24,7 +25,10 @@ filters:
24
25
  - {name: long, type: long}
25
26
  - {name: string, type: string}
26
27
  - {name: double, type: double}
27
- - {name: json, type: json}
28
+ - {name: json1, type: json}
29
+ - {name: json2, type: json}
28
30
  - {name: boolean, type: boolean}
31
+ - {name: "$.json1.string", type: long}
32
+ - {name: "$.json2.long", type: long}
29
33
  out:
30
34
  type: "null"
@@ -2,17 +2,45 @@ package org.embulk.filter.typecast;
2
2
 
3
3
  import org.embulk.filter.typecast.cast.*;
4
4
 
5
+ import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
6
+
5
7
  import org.embulk.spi.Column;
6
8
  import org.embulk.spi.DataException;
9
+ import org.embulk.spi.Exec;
7
10
  import org.embulk.spi.PageBuilder;
11
+ import org.embulk.spi.PageReader;
12
+ import org.embulk.spi.Schema;
8
13
  import org.embulk.spi.time.Timestamp;
9
14
  import org.embulk.spi.time.TimestampFormatter;
10
15
  import org.embulk.spi.time.TimestampParser;
11
16
  import org.embulk.spi.type.*;
12
17
  import org.msgpack.value.Value;
13
18
 
14
- class TypecastPageBuilder {
15
- static void setFromBoolean(PageBuilder pageBuilder, Column outputColumn, boolean value) {
19
+ import org.slf4j.Logger;
20
+
21
+
22
+ class ColumnCaster
23
+ {
24
+ private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
25
+ private final PluginTask task;
26
+ private final Schema inputSchema;
27
+ private final Schema outputSchema;
28
+ private final PageReader pageReader;
29
+ private final PageBuilder pageBuilder;
30
+ private final JsonVisitor jsonVisitor;
31
+
32
+ ColumnCaster(TypecastFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema,
33
+ PageReader pageReader, PageBuilder pageBuilder)
34
+ {
35
+ this.task = task;
36
+ this.inputSchema = inputSchema;
37
+ this.outputSchema = outputSchema;
38
+ this.pageReader = pageReader;
39
+ this.pageBuilder = pageBuilder;
40
+ this.jsonVisitor = new JsonVisitor(task, inputSchema, outputSchema);
41
+ }
42
+
43
+ public void setFromBoolean(Column outputColumn, boolean value) {
16
44
  Type outputType = outputColumn.getType();
17
45
  if (outputType instanceof BooleanType) {
18
46
  pageBuilder.setBoolean(outputColumn, BooleanCast.asBoolean(value));
@@ -31,7 +59,7 @@ class TypecastPageBuilder {
31
59
  }
32
60
  }
33
61
 
34
- static void setFromLong(PageBuilder pageBuilder, Column outputColumn, long value)
62
+ public void setFromLong(Column outputColumn, long value)
35
63
  {
36
64
  Type outputType = outputColumn.getType();
37
65
  if (outputType instanceof BooleanType) {
@@ -51,7 +79,7 @@ class TypecastPageBuilder {
51
79
  }
52
80
  }
53
81
 
54
- static void setFromDouble(PageBuilder pageBuilder, Column outputColumn, double value)
82
+ public void setFromDouble(Column outputColumn, double value)
55
83
  {
56
84
  try {
57
85
  Type outputType = outputColumn.getType();
@@ -76,7 +104,7 @@ class TypecastPageBuilder {
76
104
  }
77
105
  }
78
106
 
79
- static void setFromString(PageBuilder pageBuilder, Column outputColumn, String value, TimestampParser timestampParser)
107
+ public void setFromString(Column outputColumn, String value, TimestampParser timestampParser)
80
108
  {
81
109
  Type outputType = outputColumn.getType();
82
110
  if (outputType instanceof BooleanType) {
@@ -90,13 +118,16 @@ class TypecastPageBuilder {
90
118
  } else if (outputType instanceof TimestampType) {
91
119
  pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
92
120
  } else if (outputType instanceof JsonType) {
93
- pageBuilder.setJson(outputColumn, StringCast.asJson(value));
121
+ Value jsonValue = StringCast.asJson(value);
122
+ String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
123
+ Value castedValue = jsonVisitor.visit(jsonPath, jsonValue);
124
+ pageBuilder.setJson(outputColumn, castedValue);
94
125
  } else {
95
126
  assert(false);
96
127
  }
97
128
  }
98
129
 
99
- static void setFromTimestamp(PageBuilder pageBuilder, Column outputColumn, Timestamp value, TimestampFormatter timestampFormatter)
130
+ public void setFromTimestamp(Column outputColumn, Timestamp value, TimestampFormatter timestampFormatter)
100
131
  {
101
132
  Type outputType = outputColumn.getType();
102
133
  if (outputType instanceof BooleanType) {
@@ -116,21 +147,23 @@ class TypecastPageBuilder {
116
147
  }
117
148
  }
118
149
 
119
- static void setFromJson(PageBuilder pageBuilder, Column outputColumn, Value value)
150
+ public void setFromJson(Column outputColumn, Value value)
120
151
  {
152
+ String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
153
+ Value castedValue = jsonVisitor.visit(jsonPath, value);
121
154
  Type outputType = outputColumn.getType();
122
155
  if (outputType instanceof BooleanType) {
123
- pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(value));
156
+ pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(castedValue));
124
157
  } else if (outputType instanceof LongType) {
125
- pageBuilder.setLong(outputColumn, JsonCast.asLong(value));
158
+ pageBuilder.setLong(outputColumn, JsonCast.asLong(castedValue));
126
159
  } else if (outputType instanceof DoubleType) {
127
- pageBuilder.setDouble(outputColumn, JsonCast.asDouble(value));
160
+ pageBuilder.setDouble(outputColumn, JsonCast.asDouble(castedValue));
128
161
  } else if (outputType instanceof StringType) {
129
- pageBuilder.setString(outputColumn, JsonCast.asString(value));
162
+ pageBuilder.setString(outputColumn, JsonCast.asString(castedValue));
130
163
  } else if (outputType instanceof TimestampType) {
131
- pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(value));
164
+ pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(castedValue));
132
165
  } else if (outputType instanceof JsonType) {
133
- pageBuilder.setJson(outputColumn, JsonCast.asJson(value));
166
+ pageBuilder.setJson(outputColumn, JsonCast.asJson(castedValue));
134
167
  } else {
135
168
  assert(false);
136
169
  }
@@ -1,11 +1,8 @@
1
1
  package org.embulk.filter.typecast;
2
2
 
3
3
  import org.embulk.spi.*;
4
- import org.embulk.spi.type.Type;
5
- import org.msgpack.value.ArrayValue;
6
- import org.msgpack.value.MapValue;
7
- import org.msgpack.value.Value;
8
- import org.msgpack.value.ValueFactory;
4
+ import org.embulk.spi.type.StringType;
5
+ import org.embulk.spi.type.TimestampType;
9
6
 
10
7
  import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
11
8
  import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
@@ -16,8 +13,6 @@ import org.joda.time.DateTimeZone;
16
13
  import org.slf4j.Logger;
17
14
 
18
15
  import java.util.HashMap;
19
- import java.util.HashSet;
20
- import java.util.Map;
21
16
 
22
17
  public class ColumnVisitorImpl
23
18
  implements ColumnVisitor
@@ -31,11 +26,10 @@ public class ColumnVisitorImpl
31
26
  private final HashMap<String, Column> outputColumnMap = new HashMap<>();
32
27
  private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
33
28
  private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
34
- private final HashSet<String> shouldVisitJsonPathSet = new HashSet<>();
35
- private final HashMap<String, Type> jsonPathTypeMap = new HashMap<>();
29
+ private final ColumnCaster columnCaster;
36
30
 
37
31
  ColumnVisitorImpl(PluginTask task, Schema inputSchema, Schema outputSchema,
38
- PageReader pageReader, PageBuilder pageBuilder)
32
+ PageReader pageReader, PageBuilder pageBuilder)
39
33
  {
40
34
  this.task = task;
41
35
  this.inputSchema = inputSchema;
@@ -43,11 +37,11 @@ public class ColumnVisitorImpl
43
37
  this.pageReader = pageReader;
44
38
  this.pageBuilder = pageBuilder;
45
39
 
40
+ this.columnCaster = new ColumnCaster(task, inputSchema, outputSchema, pageReader, pageBuilder);
41
+
46
42
  buildOutputColumnMap();
47
43
  buildTimestampParserMap();
48
44
  buildTimestampFormatterMap();
49
- buildShouldVisitJsonPathSet();;
50
- buildJsonPathTypeMap();
51
45
  }
52
46
 
53
47
  private void buildOutputColumnMap()
@@ -60,132 +54,46 @@ public class ColumnVisitorImpl
60
54
 
61
55
  private void buildTimestampParserMap()
62
56
  {
63
- // columnName or jsonPath => TimestampParser
57
+ // columnName => TimestampParser
64
58
  for (ColumnConfig columnConfig : task.getColumns()) {
65
- TimestampParser parser = getTimestampParser(columnConfig, task);
66
- this.timestampParserMap.put(columnConfig.getName(), parser);
67
- }
68
- }
69
-
70
- private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
71
- {
72
- DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
73
- String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
74
- return new TimestampParser(task.getJRuby(), format, timezone);
75
- }
76
-
77
- private void buildTimestampFormatterMap()
78
- {
79
- // columnName or jsonPath => TimestampFormatter
80
- for (ColumnConfig columnConfig : task.getColumns()) {
81
- TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
82
- this.timestampFormatterMap.put(columnConfig.getName(), parser);
83
- }
84
- }
85
-
86
- private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
87
- {
88
- String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
89
- DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
90
- return new TimestampFormatter(task.getJRuby(), format, timezone);
91
- }
92
-
93
- private void buildShouldVisitJsonPathSet()
94
- {
95
- // json partial path => Boolean to avoid unnecessary type: json visit
96
- for (ColumnConfig columnConfig : task.getColumns()) {
97
- String name = columnConfig.getName();
98
- if (!name.startsWith("$.")) {
99
- continue;
59
+ if (columnConfig.getName().startsWith("$.")) {
60
+ continue; // type: json columns do not support type: timestamp
100
61
  }
101
- String[] parts = name.split("\\.");
102
- StringBuilder partialPath = new StringBuilder("$");
103
- for (int i = 1; i < parts.length; i++) {
104
- if (parts[i].contains("[")) {
105
- String[] arrayParts = parts[i].split("\\[");
106
- partialPath.append(".").append(arrayParts[0]);
107
- this.shouldVisitJsonPathSet.add(partialPath.toString());
108
- for (int j = 1; j < arrayParts.length; j++) {
109
- partialPath.append("[").append(arrayParts[j]);
110
- this.shouldVisitJsonPathSet.add(partialPath.toString());
111
- }
112
- }
113
- else {
114
- partialPath.append(".").append(parts[i]);
115
- this.shouldVisitJsonPathSet.add(partialPath.toString());
116
- }
62
+ Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
63
+ if (inputColumn.getType() instanceof StringType && columnConfig.getType() instanceof TimestampType) {
64
+ TimestampParser parser = getTimestampParser(columnConfig, task);
65
+ this.timestampParserMap.put(columnConfig.getName(), parser);
117
66
  }
118
67
  }
119
68
  }
120
69
 
121
- private void buildJsonPathTypeMap()
70
+ private void buildTimestampFormatterMap()
122
71
  {
123
- // json path => Type
72
+ // columnName => TimestampFormatter
124
73
  for (ColumnConfig columnConfig : task.getColumns()) {
125
- String name = columnConfig.getName();
126
- if (!name.startsWith("$.")) {
127
- continue;
74
+ if (columnConfig.getName().startsWith("$.")) {
75
+ continue; // type: json columns do not have type: timestamp
76
+ }
77
+ Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
78
+ if (inputColumn.getType() instanceof TimestampType && columnConfig.getType() instanceof StringType) {
79
+ TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
80
+ this.timestampFormatterMap.put(columnConfig.getName(), parser);
128
81
  }
129
- Type type = columnConfig.getType();
130
- this.jsonPathTypeMap.put(name, type);
131
82
  }
132
83
  }
133
84
 
134
- private boolean shouldVisitJsonPath(String jsonPath)
85
+ private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
135
86
  {
136
- return shouldVisitJsonPathSet.contains(jsonPath);
87
+ DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
88
+ String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
89
+ return new TimestampParser(task.getJRuby(), format, timezone);
137
90
  }
138
91
 
139
- private Value castJsonRecursively(PluginTask task, String jsonPath, Value value)
92
+ private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
140
93
  {
141
- if (!shouldVisitJsonPath(jsonPath)) {
142
- return value;
143
- }
144
- if (value.isArrayValue()) {
145
- ArrayValue arrayValue = value.asArrayValue();
146
- int size = arrayValue.size();
147
- Value[] newValue = new Value[size];
148
- for (int i = 0; i < size; i++) {
149
- String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
150
- Value v = arrayValue.get(i);
151
- newValue[i] = castJsonRecursively(task, k, v);
152
- }
153
- return ValueFactory.newArray(newValue, true);
154
- }
155
- else if (value.isMapValue()) {
156
- MapValue mapValue = value.asMapValue();
157
- int size = mapValue.size() * 2;
158
- Value[] newValue = new Value[size];
159
- int i = 0;
160
- for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
161
- Value k = entry.getKey();
162
- Value v = entry.getValue();
163
- String newPath = new StringBuilder(jsonPath).append(".").append(k.asStringValue().asString()).toString();
164
- Value r = castJsonRecursively(task, newPath, v);
165
- newValue[i++] = k;
166
- newValue[i++] = r;
167
- }
168
- return ValueFactory.newMap(newValue, true);
169
- }
170
- else if (value.isBooleanValue()) {
171
- Type outputType = jsonPathTypeMap.get(jsonPath);
172
- return TypecastJsonBuilder.getFromBoolean(outputType, value.asBooleanValue().getBoolean());
173
- }
174
- else if (value.isIntegerValue()) {
175
- Type outputType = jsonPathTypeMap.get(jsonPath);
176
- return TypecastJsonBuilder.getFromLong(outputType, value.asIntegerValue().asLong());
177
- }
178
- else if (value.isFloatValue()) {
179
- Type outputType = jsonPathTypeMap.get(jsonPath);
180
- return TypecastJsonBuilder.getFromDouble(outputType, value.asFloatValue().toDouble());
181
- }
182
- else if (value.isStringValue()) {
183
- Type outputType = jsonPathTypeMap.get(jsonPath);
184
- return TypecastJsonBuilder.getFromString(outputType, value.asStringValue().asString());
185
- }
186
- else {
187
- return value;
188
- }
94
+ String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
95
+ DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
96
+ return new TimestampFormatter(task.getJRuby(), format, timezone);
189
97
  }
190
98
 
191
99
  private interface PageBuildable
@@ -217,7 +125,7 @@ public class ColumnVisitorImpl
217
125
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
218
126
  PageBuildable op = new PageBuildable() {
219
127
  public void run() throws DataException {
220
- TypecastPageBuilder.setFromBoolean(pageBuilder, outputColumn, pageReader.getBoolean(inputColumn));
128
+ columnCaster.setFromBoolean(outputColumn, pageReader.getBoolean(inputColumn));
221
129
  }
222
130
  };
223
131
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -229,7 +137,7 @@ public class ColumnVisitorImpl
229
137
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
230
138
  PageBuildable op = new PageBuildable() {
231
139
  public void run() throws DataException {
232
- TypecastPageBuilder.setFromLong(pageBuilder, outputColumn, pageReader.getLong(inputColumn));
140
+ columnCaster.setFromLong(outputColumn, pageReader.getLong(inputColumn));
233
141
  }
234
142
  };
235
143
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -241,7 +149,7 @@ public class ColumnVisitorImpl
241
149
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
242
150
  PageBuildable op = new PageBuildable() {
243
151
  public void run() throws DataException {
244
- TypecastPageBuilder.setFromDouble(pageBuilder, outputColumn, pageReader.getDouble(inputColumn));
152
+ columnCaster.setFromDouble(outputColumn, pageReader.getDouble(inputColumn));
245
153
  }
246
154
  };
247
155
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -254,8 +162,7 @@ public class ColumnVisitorImpl
254
162
  final TimestampParser timestampParser = timestampParserMap.get(inputColumn.getName());
255
163
  PageBuildable op = new PageBuildable() {
256
164
  public void run() throws DataException {
257
- TypecastPageBuilder.setFromString(
258
- pageBuilder, outputColumn, pageReader.getString(inputColumn), timestampParser);
165
+ columnCaster.setFromString(outputColumn, pageReader.getString(inputColumn), timestampParser);
259
166
  }
260
167
  };
261
168
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -268,8 +175,7 @@ public class ColumnVisitorImpl
268
175
  final TimestampFormatter timestampFormatter = timestampFormatterMap.get(inputColumn.getName());
269
176
  PageBuildable op = new PageBuildable() {
270
177
  public void run() throws DataException {
271
- TypecastPageBuilder.setFromTimestamp(
272
- pageBuilder, outputColumn, pageReader.getTimestamp(inputColumn), timestampFormatter);
178
+ columnCaster.setFromTimestamp(outputColumn, pageReader.getTimestamp(inputColumn), timestampFormatter);
273
179
  }
274
180
  };
275
181
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -278,13 +184,10 @@ public class ColumnVisitorImpl
278
184
  @Override
279
185
  public void jsonColumn(final Column inputColumn)
280
186
  {
281
- String jsonPath = new StringBuilder("$.").append(inputColumn.getName()).toString();
282
- Value value = pageReader.getJson(inputColumn);
283
- final Value castedValue = castJsonRecursively(task, jsonPath, value);
284
187
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
285
188
  PageBuildable op = new PageBuildable() {
286
189
  public void run() throws DataException {
287
- TypecastPageBuilder.setFromJson(pageBuilder, outputColumn, castedValue);
190
+ columnCaster.setFromJson(outputColumn, pageReader.getJson(inputColumn));
288
191
  }
289
192
  };
290
193
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -4,21 +4,28 @@ import org.embulk.filter.typecast.cast.*;
4
4
 
5
5
  import org.embulk.spi.DataException;
6
6
  import org.embulk.spi.type.*;
7
+ import org.msgpack.value.BooleanValue;
8
+ import org.msgpack.value.IntegerValue;
9
+ import org.msgpack.value.FloatValue;
10
+ import org.msgpack.value.StringValue;
7
11
  import org.msgpack.value.Value;
8
12
  import org.msgpack.value.ValueFactory;
9
13
 
10
- class TypecastJsonBuilder {
11
- static Value getFromBoolean(Type outputType, boolean value) {
14
+ class JsonCaster
15
+ {
16
+ public JsonCaster()
17
+ {
18
+ }
19
+
20
+ public Value fromBoolean(Type outputType, BooleanValue value) {
12
21
  if (outputType instanceof BooleanType) {
13
- return ValueFactory.newBoolean(value);
22
+ return value;
14
23
  } else if (outputType instanceof LongType) {
15
- return ValueFactory.newInteger(BooleanCast.asLong(value));
24
+ return ValueFactory.newInteger(BooleanCast.asLong(value.getBoolean()));
16
25
  } else if (outputType instanceof DoubleType) {
17
- return ValueFactory.newFloat(BooleanCast.asDouble(value));
26
+ return ValueFactory.newFloat(BooleanCast.asDouble(value.getBoolean()));
18
27
  } else if (outputType instanceof StringType) {
19
- return ValueFactory.newString(BooleanCast.asString(value));
20
- } else if (outputType instanceof TimestampType) {
21
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
28
+ return ValueFactory.newString(BooleanCast.asString(value.getBoolean()));
22
29
  } else if (outputType instanceof JsonType) {
23
30
  throw new DataException(String.format("cannot cast boolean to json: \"%s\"", value));
24
31
  } else {
@@ -27,18 +34,16 @@ class TypecastJsonBuilder {
27
34
  }
28
35
  }
29
36
 
30
- static Value getFromLong(Type outputType, long value)
37
+ public Value fromLong(Type outputType, IntegerValue value)
31
38
  {
32
39
  if (outputType instanceof BooleanType) {
33
- return ValueFactory.newBoolean(LongCast.asBoolean(value));
40
+ return ValueFactory.newBoolean(LongCast.asBoolean(value.asLong()));
34
41
  } else if (outputType instanceof LongType) {
35
- return ValueFactory.newInteger(value);
42
+ return value;
36
43
  } else if (outputType instanceof DoubleType) {
37
- return ValueFactory.newFloat(LongCast.asDouble(value));
44
+ return ValueFactory.newFloat(LongCast.asDouble(value.asLong()));
38
45
  } else if (outputType instanceof StringType) {
39
- return ValueFactory.newString(LongCast.asString(value));
40
- } else if (outputType instanceof TimestampType) {
41
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
46
+ return ValueFactory.newString(LongCast.asString(value.asLong()));
42
47
  } else if (outputType instanceof JsonType) {
43
48
  throw new DataException(String.format("cannot cast long to json:: \"%s\"", value));
44
49
  } else {
@@ -47,18 +52,16 @@ class TypecastJsonBuilder {
47
52
  }
48
53
  }
49
54
 
50
- static Value getFromDouble(Type outputType, double value)
55
+ public Value fromDouble(Type outputType, FloatValue value)
51
56
  {
52
57
  if (outputType instanceof BooleanType) {
53
- return ValueFactory.newBoolean(DoubleCast.asBoolean(value));
58
+ return ValueFactory.newBoolean(DoubleCast.asBoolean(value.toDouble()));
54
59
  } else if (outputType instanceof LongType) {
55
- return ValueFactory.newInteger(DoubleCast.asLong(value));
60
+ return ValueFactory.newInteger(DoubleCast.asLong(value.toDouble()));
56
61
  } else if (outputType instanceof DoubleType) {
57
- return ValueFactory.newFloat(DoubleCast.asDouble(value));
62
+ return value;
58
63
  } else if (outputType instanceof StringType) {
59
- return ValueFactory.newString(DoubleCast.asString(value));
60
- } else if (outputType instanceof TimestampType) {
61
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
64
+ return ValueFactory.newString(DoubleCast.asString(value.toDouble()));
62
65
  } else if (outputType instanceof JsonType) {
63
66
  throw new DataException(String.format("cannot cast double to json:: \"%s\"", value));
64
67
  } else {
@@ -67,40 +70,18 @@ class TypecastJsonBuilder {
67
70
  }
68
71
  }
69
72
 
70
- static Value getFromString(Type outputType, String value)
71
- {
72
- if (outputType instanceof BooleanType) {
73
- return ValueFactory.newBoolean(StringCast.asBoolean(value));
74
- } else if (outputType instanceof LongType) {
75
- return ValueFactory.newInteger(StringCast.asLong(value));
76
- } else if (outputType instanceof DoubleType) {
77
- return ValueFactory.newFloat(StringCast.asDouble(value));
78
- } else if (outputType instanceof StringType) {
79
- return ValueFactory.newString(StringCast.asString(value));
80
- } else if (outputType instanceof TimestampType) {
81
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
82
- } else if (outputType instanceof JsonType) {
83
- return StringCast.asJson(value);
84
- } else {
85
- assert(false);
86
- return null;
87
- }
88
- }
89
-
90
- static Value getFromJson(Type outputType, Value value)
73
+ public Value fromString(Type outputType, StringValue value)
91
74
  {
92
75
  if (outputType instanceof BooleanType) {
93
- return ValueFactory.newBoolean(JsonCast.asBoolean(value));
76
+ return ValueFactory.newBoolean(StringCast.asBoolean(value.asString()));
94
77
  } else if (outputType instanceof LongType) {
95
- return ValueFactory.newInteger(JsonCast.asLong(value));
78
+ return ValueFactory.newInteger(StringCast.asLong(value.asString()));
96
79
  } else if (outputType instanceof DoubleType) {
97
- return ValueFactory.newFloat(JsonCast.asDouble(value));
80
+ return ValueFactory.newFloat(StringCast.asDouble(value.asString()));
98
81
  } else if (outputType instanceof StringType) {
99
- return ValueFactory.newString(JsonCast.asString(value));
100
- } else if (outputType instanceof TimestampType) {
101
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
102
- } else if (outputType instanceof JsonType) {
103
82
  return value;
83
+ } else if (outputType instanceof JsonType) {
84
+ return StringCast.asJson(value.asString());
104
85
  } else {
105
86
  assert(false);
106
87
  return null;
@@ -0,0 +1,136 @@
1
+ package org.embulk.filter.typecast;
2
+
3
+ import org.embulk.spi.*;
4
+ import org.embulk.spi.type.Type;
5
+ import org.msgpack.value.ArrayValue;
6
+ import org.msgpack.value.MapValue;
7
+ import org.msgpack.value.Value;
8
+ import org.msgpack.value.ValueFactory;
9
+
10
+ import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
11
+ import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
12
+
13
+ import org.slf4j.Logger;
14
+
15
+ import java.util.HashMap;
16
+ import java.util.HashSet;
17
+ import java.util.Map;
18
+
19
+ public class JsonVisitor
20
+ {
21
+ private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
22
+ private final PluginTask task;
23
+ private final Schema inputSchema;
24
+ private final Schema outputSchema;
25
+ private final HashSet<String> shouldVisitSet = new HashSet<>();
26
+ private final HashMap<String, Type> jsonPathTypeMap = new HashMap<>();
27
+ private final JsonCaster jsonCaster = new JsonCaster();
28
+
29
+ JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
30
+ {
31
+ this.task = task;
32
+ this.inputSchema = inputSchema;
33
+ this.outputSchema = outputSchema;
34
+
35
+ buildShouldVisitSet();
36
+ buildJsonPathTypeMap();
37
+ }
38
+
39
+ private void buildJsonPathTypeMap()
40
+ {
41
+ // json path => Type
42
+ for (ColumnConfig columnConfig : task.getColumns()) {
43
+ String name = columnConfig.getName();
44
+ if (!name.startsWith("$.")) {
45
+ continue;
46
+ }
47
+ Type type = columnConfig.getType();
48
+ this.jsonPathTypeMap.put(name, type);
49
+ }
50
+ }
51
+
52
+ private void buildShouldVisitSet()
53
+ {
54
+ // json partial path => Boolean to avoid unnecessary type: json visit
55
+ for (ColumnConfig columnConfig : task.getColumns()) {
56
+ String name = columnConfig.getName();
57
+ if (!name.startsWith("$.")) {
58
+ continue;
59
+ }
60
+ String[] parts = name.split("\\.");
61
+ StringBuilder partialPath = new StringBuilder("$");
62
+ for (int i = 1; i < parts.length; i++) {
63
+ if (parts[i].contains("[")) {
64
+ String[] arrayParts = parts[i].split("\\[");
65
+ partialPath.append(".").append(arrayParts[0]);
66
+ this.shouldVisitSet.add(partialPath.toString());
67
+ for (int j = 1; j < arrayParts.length; j++) {
68
+ partialPath.append("[").append(arrayParts[j]);
69
+ this.shouldVisitSet.add(partialPath.toString());
70
+ }
71
+ }
72
+ else {
73
+ partialPath.append(".").append(parts[i]);
74
+ this.shouldVisitSet.add(partialPath.toString());
75
+ }
76
+ }
77
+ }
78
+ }
79
+
80
+ private boolean shouldVisit(String jsonPath)
81
+ {
82
+ return shouldVisitSet.contains(jsonPath);
83
+ }
84
+
85
+ public Value visit(String jsonPath, Value value)
86
+ {
87
+ if (!shouldVisit(jsonPath)) {
88
+ return value;
89
+ }
90
+ if (value.isArrayValue()) {
91
+ ArrayValue arrayValue = value.asArrayValue();
92
+ int size = arrayValue.size();
93
+ Value[] newValue = new Value[size];
94
+ for (int i = 0; i < size; i++) {
95
+ String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
96
+ Value v = arrayValue.get(i);
97
+ newValue[i] = visit(k, v);
98
+ }
99
+ return ValueFactory.newArray(newValue, true);
100
+ }
101
+ else if (value.isMapValue()) {
102
+ MapValue mapValue = value.asMapValue();
103
+ int size = mapValue.size() * 2;
104
+ Value[] newValue = new Value[size];
105
+ int i = 0;
106
+ for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
107
+ Value k = entry.getKey();
108
+ Value v = entry.getValue();
109
+ String newPath = new StringBuilder(jsonPath).append(".").append(k.asStringValue().asString()).toString();
110
+ Value r = visit(newPath, v);
111
+ newValue[i++] = k;
112
+ newValue[i++] = r;
113
+ }
114
+ return ValueFactory.newMap(newValue, true);
115
+ }
116
+ else if (value.isBooleanValue()) {
117
+ Type outputType = jsonPathTypeMap.get(jsonPath);
118
+ return jsonCaster.fromBoolean(outputType, value.asBooleanValue());
119
+ }
120
+ else if (value.isIntegerValue()) {
121
+ Type outputType = jsonPathTypeMap.get(jsonPath);
122
+ return jsonCaster.fromLong(outputType, value.asIntegerValue());
123
+ }
124
+ else if (value.isFloatValue()) {
125
+ Type outputType = jsonPathTypeMap.get(jsonPath);
126
+ return jsonCaster.fromDouble(outputType, value.asFloatValue());
127
+ }
128
+ else if (value.isStringValue()) {
129
+ Type outputType = jsonPathTypeMap.get(jsonPath);
130
+ return jsonCaster.fromString(outputType, value.asStringValue());
131
+ }
132
+ else {
133
+ return value;
134
+ }
135
+ }
136
+ }
@@ -4,15 +4,18 @@ import com.google.common.base.Optional;
4
4
  import com.google.common.collect.ImmutableList;
5
5
  import org.embulk.config.Config;
6
6
  import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigException;
7
8
  import org.embulk.config.ConfigInject;
8
9
  import org.embulk.config.ConfigSource;
9
10
  import org.embulk.config.Task;
10
11
  import org.embulk.config.TaskSource;
11
12
 
12
13
  import org.embulk.spi.*;
14
+ import org.embulk.spi.time.Timestamp;
13
15
  import org.embulk.spi.time.TimestampFormatter;
14
16
  import org.embulk.spi.time.TimestampParser;
15
17
 
18
+ import org.embulk.spi.type.TimestampType;
16
19
  import org.embulk.spi.type.Type;
17
20
  import org.joda.time.DateTimeZone;
18
21
  import org.jruby.embed.ScriptingContainer;
@@ -86,7 +89,7 @@ public class TypecastFilterPlugin implements FilterPlugin
86
89
  // throw if column does not exist
87
90
  for (ColumnConfig columnConfig : columnConfigs) {
88
91
  String name = columnConfig.getName();
89
- if (name.startsWith("$.")) {
92
+ if (name.startsWith("$.")) { // check only top level column name
90
93
  String firstName = name.split("\\.", 3)[1];
91
94
  inputSchema.lookupColumn(firstName);
92
95
  }
@@ -94,6 +97,13 @@ public class TypecastFilterPlugin implements FilterPlugin
94
97
  inputSchema.lookupColumn(name);
95
98
  }
96
99
  }
100
+ // throw if timestamp is specified in json path
101
+ for (ColumnConfig columnConfig : columnConfigs) {
102
+ String name = columnConfig.getName();
103
+ if (name.startsWith("$.") && columnConfig.getType() instanceof TimestampType) {
104
+ throw new ConfigException(String.format("embulk-filter-typecast: timestamp type is not supported in json column: \"%s\"", name));
105
+ }
106
+ }
97
107
  }
98
108
 
99
109
  private Schema buildOuputSchema(final PluginTask task, final Schema inputSchema)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-typecast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-27 00:00:00.000000000 Z
11
+ date: 2016-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,7 @@ files:
52
52
  - README.md
53
53
  - build.gradle
54
54
  - config/checkstyle/checkstyle.xml
55
+ - example/example.csv
55
56
  - example/example.yml
56
57
  - example/example2.yml
57
58
  - gradle/wrapper/gradle-wrapper.jar
@@ -59,10 +60,11 @@ files:
59
60
  - gradlew
60
61
  - gradlew.bat
61
62
  - lib/embulk/filter/typecast.rb
63
+ - src/main/java/org/embulk/filter/typecast/ColumnCaster.java
62
64
  - src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java
65
+ - src/main/java/org/embulk/filter/typecast/JsonCaster.java
66
+ - src/main/java/org/embulk/filter/typecast/JsonVisitor.java
63
67
  - src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java
64
- - src/main/java/org/embulk/filter/typecast/TypecastJsonBuilder.java
65
- - src/main/java/org/embulk/filter/typecast/TypecastPageBuilder.java
66
68
  - src/main/java/org/embulk/filter/typecast/cast/BooleanCast.java
67
69
  - src/main/java/org/embulk/filter/typecast/cast/DoubleCast.java
68
70
  - src/main/java/org/embulk/filter/typecast/cast/JsonCast.java
@@ -70,7 +72,7 @@ files:
70
72
  - src/main/java/org/embulk/filter/typecast/cast/StringCast.java
71
73
  - src/main/java/org/embulk/filter/typecast/cast/TimestampCast.java
72
74
  - src/test/java/org/embulk/filter/TestTypecastFilterPlugin.java
73
- - classpath/embulk-filter-typecast-0.1.1.jar
75
+ - classpath/embulk-filter-typecast-0.1.2.jar
74
76
  homepage: https://github.com/sonots/embulk-filter-typecast
75
77
  licenses:
76
78
  - MIT