embulk-filter-typecast 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db51b360ea765f3b96a301ec98144cc1f0033ebe
4
- data.tar.gz: e9c35bc3f9af842a370b655bd81f8253d49d56e9
3
+ metadata.gz: e8a2482d0bd6fc6109bb8763f9a1f9db6b0733db
4
+ data.tar.gz: b80c4d01e823e3a4b59594002277fc78e1ee0ef9
5
5
  SHA512:
6
- metadata.gz: 7dd4e6b33e658ad72e22b7ad65fe4738244505ac7ebd75d6b2333ff3f260bdd9ef979f5b800d3608836ff928fcd4e1d56e78290dab8cabc5c7a079660706097d
7
- data.tar.gz: 6d356bbf862ad960684b65eda40e8bcae43f8559515cd1ee36468f92fb997f8a0e225c3b88dfe2ec0d00216e41bae65b494fed58046b3a36e90907965ed78f6a
6
+ metadata.gz: 7473b57b158d8936e14015358dcd21d3bfe852f0b7c33de10da4ad38aa4553b3ddf39e5b41c9ed11df32713c42a33506aba22820b2236590ab124035a1056783
7
+ data.tar.gz: 354bf8ab52c2ee5ba124af046abea9e311213c4ebaa03991f432504cc87094a0e1d4815c3c94434ee843de836f6e2146331540fd23b286071c000011bb64c55a
data/.gitignore CHANGED
@@ -6,7 +6,6 @@
6
6
  /classpath/
7
7
  build/
8
8
  .idea
9
- *.csv
10
9
  .tags
11
10
  .ruby-version
12
11
  *.iml
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.2 (2016-04-28)
2
+
3
+ Enhancements:
4
+
5
+ * Support typecast in json path for casted json (from string)
6
+
1
7
  # 0.1.1 (2016-04-28)
2
8
 
3
9
  Enhancements:
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.1.2"
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
@@ -0,0 +1,11 @@
1
+ timestamp,null,long,string,double,json1,json2,boolean
2
+ 2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"0"},{"long":0},true
3
+ 2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"1"},{"long":1},true
4
+ 2015-07-13 00:00:00.100000,,92,0hgDRI_m,810.9,{"string":"2"},{"long":2},true
5
+ 2015-07-13 00:00:00.100000,,93,KjCRAc-A,477.4,{"string":"3"},{"long":3},true
6
+ 2015-07-13 00:00:00.100000,,94,fyQVGlT8,725.3,{"string":"4"},{"long":4},true
7
+ 2015-07-13 00:00:00.100000,,95,FpBYRPWK,316.6,{"string":"5"},{"long":5},false
8
+ 2015-07-13 00:00:00.100000,,96,9ikvnUqp,369.5,{"string":"6"},{"long":6},false
9
+ 2015-07-13 00:00:00.100000,,97,RRNYDAzK,506.5,{"string":"7"},{"long":7},false
10
+ 2015-07-13 00:00:00.100000,,90,l6lTsvxd,903.4,{"string":"8"},{"long":8},false
11
+ 2015-07-13 00:00:00.100000,,91,XoALSEQg,394.5,{"string":"9"},{"long":9},false
data/example/example2.yml CHANGED
@@ -14,7 +14,8 @@ in:
14
14
  - {name: long, type: string}
15
15
  - {name: string, type: string}
16
16
  - {name: double, type: string}
17
- - {name: json, type: string}
17
+ - {name: json1, type: string}
18
+ - {name: json2, type: string}
18
19
  - {name: boolean, type: boolean}
19
20
  filters:
20
21
  - type: typecast
@@ -24,7 +25,10 @@ filters:
24
25
  - {name: long, type: long}
25
26
  - {name: string, type: string}
26
27
  - {name: double, type: double}
27
- - {name: json, type: json}
28
+ - {name: json1, type: json}
29
+ - {name: json2, type: json}
28
30
  - {name: boolean, type: boolean}
31
+ - {name: "$.json1.string", type: long}
32
+ - {name: "$.json2.long", type: long}
29
33
  out:
30
34
  type: "null"
@@ -2,17 +2,45 @@ package org.embulk.filter.typecast;
2
2
 
3
3
  import org.embulk.filter.typecast.cast.*;
4
4
 
5
+ import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
6
+
5
7
  import org.embulk.spi.Column;
6
8
  import org.embulk.spi.DataException;
9
+ import org.embulk.spi.Exec;
7
10
  import org.embulk.spi.PageBuilder;
11
+ import org.embulk.spi.PageReader;
12
+ import org.embulk.spi.Schema;
8
13
  import org.embulk.spi.time.Timestamp;
9
14
  import org.embulk.spi.time.TimestampFormatter;
10
15
  import org.embulk.spi.time.TimestampParser;
11
16
  import org.embulk.spi.type.*;
12
17
  import org.msgpack.value.Value;
13
18
 
14
- class TypecastPageBuilder {
15
- static void setFromBoolean(PageBuilder pageBuilder, Column outputColumn, boolean value) {
19
+ import org.slf4j.Logger;
20
+
21
+
22
+ class ColumnCaster
23
+ {
24
+ private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
25
+ private final PluginTask task;
26
+ private final Schema inputSchema;
27
+ private final Schema outputSchema;
28
+ private final PageReader pageReader;
29
+ private final PageBuilder pageBuilder;
30
+ private final JsonVisitor jsonVisitor;
31
+
32
+ ColumnCaster(TypecastFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema,
33
+ PageReader pageReader, PageBuilder pageBuilder)
34
+ {
35
+ this.task = task;
36
+ this.inputSchema = inputSchema;
37
+ this.outputSchema = outputSchema;
38
+ this.pageReader = pageReader;
39
+ this.pageBuilder = pageBuilder;
40
+ this.jsonVisitor = new JsonVisitor(task, inputSchema, outputSchema);
41
+ }
42
+
43
+ public void setFromBoolean(Column outputColumn, boolean value) {
16
44
  Type outputType = outputColumn.getType();
17
45
  if (outputType instanceof BooleanType) {
18
46
  pageBuilder.setBoolean(outputColumn, BooleanCast.asBoolean(value));
@@ -31,7 +59,7 @@ class TypecastPageBuilder {
31
59
  }
32
60
  }
33
61
 
34
- static void setFromLong(PageBuilder pageBuilder, Column outputColumn, long value)
62
+ public void setFromLong(Column outputColumn, long value)
35
63
  {
36
64
  Type outputType = outputColumn.getType();
37
65
  if (outputType instanceof BooleanType) {
@@ -51,7 +79,7 @@ class TypecastPageBuilder {
51
79
  }
52
80
  }
53
81
 
54
- static void setFromDouble(PageBuilder pageBuilder, Column outputColumn, double value)
82
+ public void setFromDouble(Column outputColumn, double value)
55
83
  {
56
84
  try {
57
85
  Type outputType = outputColumn.getType();
@@ -76,7 +104,7 @@ class TypecastPageBuilder {
76
104
  }
77
105
  }
78
106
 
79
- static void setFromString(PageBuilder pageBuilder, Column outputColumn, String value, TimestampParser timestampParser)
107
+ public void setFromString(Column outputColumn, String value, TimestampParser timestampParser)
80
108
  {
81
109
  Type outputType = outputColumn.getType();
82
110
  if (outputType instanceof BooleanType) {
@@ -90,13 +118,16 @@ class TypecastPageBuilder {
90
118
  } else if (outputType instanceof TimestampType) {
91
119
  pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser));
92
120
  } else if (outputType instanceof JsonType) {
93
- pageBuilder.setJson(outputColumn, StringCast.asJson(value));
121
+ Value jsonValue = StringCast.asJson(value);
122
+ String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
123
+ Value castedValue = jsonVisitor.visit(jsonPath, jsonValue);
124
+ pageBuilder.setJson(outputColumn, castedValue);
94
125
  } else {
95
126
  assert(false);
96
127
  }
97
128
  }
98
129
 
99
- static void setFromTimestamp(PageBuilder pageBuilder, Column outputColumn, Timestamp value, TimestampFormatter timestampFormatter)
130
+ public void setFromTimestamp(Column outputColumn, Timestamp value, TimestampFormatter timestampFormatter)
100
131
  {
101
132
  Type outputType = outputColumn.getType();
102
133
  if (outputType instanceof BooleanType) {
@@ -116,21 +147,23 @@ class TypecastPageBuilder {
116
147
  }
117
148
  }
118
149
 
119
- static void setFromJson(PageBuilder pageBuilder, Column outputColumn, Value value)
150
+ public void setFromJson(Column outputColumn, Value value)
120
151
  {
152
+ String jsonPath = new StringBuilder("$.").append(outputColumn.getName()).toString();
153
+ Value castedValue = jsonVisitor.visit(jsonPath, value);
121
154
  Type outputType = outputColumn.getType();
122
155
  if (outputType instanceof BooleanType) {
123
- pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(value));
156
+ pageBuilder.setBoolean(outputColumn, JsonCast.asBoolean(castedValue));
124
157
  } else if (outputType instanceof LongType) {
125
- pageBuilder.setLong(outputColumn, JsonCast.asLong(value));
158
+ pageBuilder.setLong(outputColumn, JsonCast.asLong(castedValue));
126
159
  } else if (outputType instanceof DoubleType) {
127
- pageBuilder.setDouble(outputColumn, JsonCast.asDouble(value));
160
+ pageBuilder.setDouble(outputColumn, JsonCast.asDouble(castedValue));
128
161
  } else if (outputType instanceof StringType) {
129
- pageBuilder.setString(outputColumn, JsonCast.asString(value));
162
+ pageBuilder.setString(outputColumn, JsonCast.asString(castedValue));
130
163
  } else if (outputType instanceof TimestampType) {
131
- pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(value));
164
+ pageBuilder.setTimestamp(outputColumn, JsonCast.asTimestamp(castedValue));
132
165
  } else if (outputType instanceof JsonType) {
133
- pageBuilder.setJson(outputColumn, JsonCast.asJson(value));
166
+ pageBuilder.setJson(outputColumn, JsonCast.asJson(castedValue));
134
167
  } else {
135
168
  assert(false);
136
169
  }
@@ -1,11 +1,8 @@
1
1
  package org.embulk.filter.typecast;
2
2
 
3
3
  import org.embulk.spi.*;
4
- import org.embulk.spi.type.Type;
5
- import org.msgpack.value.ArrayValue;
6
- import org.msgpack.value.MapValue;
7
- import org.msgpack.value.Value;
8
- import org.msgpack.value.ValueFactory;
4
+ import org.embulk.spi.type.StringType;
5
+ import org.embulk.spi.type.TimestampType;
9
6
 
10
7
  import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
11
8
  import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
@@ -16,8 +13,6 @@ import org.joda.time.DateTimeZone;
16
13
  import org.slf4j.Logger;
17
14
 
18
15
  import java.util.HashMap;
19
- import java.util.HashSet;
20
- import java.util.Map;
21
16
 
22
17
  public class ColumnVisitorImpl
23
18
  implements ColumnVisitor
@@ -31,11 +26,10 @@ public class ColumnVisitorImpl
31
26
  private final HashMap<String, Column> outputColumnMap = new HashMap<>();
32
27
  private final HashMap<String, TimestampParser> timestampParserMap = new HashMap<>();
33
28
  private final HashMap<String, TimestampFormatter> timestampFormatterMap = new HashMap<>();
34
- private final HashSet<String> shouldVisitJsonPathSet = new HashSet<>();
35
- private final HashMap<String, Type> jsonPathTypeMap = new HashMap<>();
29
+ private final ColumnCaster columnCaster;
36
30
 
37
31
  ColumnVisitorImpl(PluginTask task, Schema inputSchema, Schema outputSchema,
38
- PageReader pageReader, PageBuilder pageBuilder)
32
+ PageReader pageReader, PageBuilder pageBuilder)
39
33
  {
40
34
  this.task = task;
41
35
  this.inputSchema = inputSchema;
@@ -43,11 +37,11 @@ public class ColumnVisitorImpl
43
37
  this.pageReader = pageReader;
44
38
  this.pageBuilder = pageBuilder;
45
39
 
40
+ this.columnCaster = new ColumnCaster(task, inputSchema, outputSchema, pageReader, pageBuilder);
41
+
46
42
  buildOutputColumnMap();
47
43
  buildTimestampParserMap();
48
44
  buildTimestampFormatterMap();
49
- buildShouldVisitJsonPathSet();;
50
- buildJsonPathTypeMap();
51
45
  }
52
46
 
53
47
  private void buildOutputColumnMap()
@@ -60,132 +54,46 @@ public class ColumnVisitorImpl
60
54
 
61
55
  private void buildTimestampParserMap()
62
56
  {
63
- // columnName or jsonPath => TimestampParser
57
+ // columnName => TimestampParser
64
58
  for (ColumnConfig columnConfig : task.getColumns()) {
65
- TimestampParser parser = getTimestampParser(columnConfig, task);
66
- this.timestampParserMap.put(columnConfig.getName(), parser);
67
- }
68
- }
69
-
70
- private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
71
- {
72
- DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
73
- String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
74
- return new TimestampParser(task.getJRuby(), format, timezone);
75
- }
76
-
77
- private void buildTimestampFormatterMap()
78
- {
79
- // columnName or jsonPath => TimestampFormatter
80
- for (ColumnConfig columnConfig : task.getColumns()) {
81
- TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
82
- this.timestampFormatterMap.put(columnConfig.getName(), parser);
83
- }
84
- }
85
-
86
- private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
87
- {
88
- String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
89
- DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
90
- return new TimestampFormatter(task.getJRuby(), format, timezone);
91
- }
92
-
93
- private void buildShouldVisitJsonPathSet()
94
- {
95
- // json partial path => Boolean to avoid unnecessary type: json visit
96
- for (ColumnConfig columnConfig : task.getColumns()) {
97
- String name = columnConfig.getName();
98
- if (!name.startsWith("$.")) {
99
- continue;
59
+ if (columnConfig.getName().startsWith("$.")) {
60
+ continue; // type: json columns do not support type: timestamp
100
61
  }
101
- String[] parts = name.split("\\.");
102
- StringBuilder partialPath = new StringBuilder("$");
103
- for (int i = 1; i < parts.length; i++) {
104
- if (parts[i].contains("[")) {
105
- String[] arrayParts = parts[i].split("\\[");
106
- partialPath.append(".").append(arrayParts[0]);
107
- this.shouldVisitJsonPathSet.add(partialPath.toString());
108
- for (int j = 1; j < arrayParts.length; j++) {
109
- partialPath.append("[").append(arrayParts[j]);
110
- this.shouldVisitJsonPathSet.add(partialPath.toString());
111
- }
112
- }
113
- else {
114
- partialPath.append(".").append(parts[i]);
115
- this.shouldVisitJsonPathSet.add(partialPath.toString());
116
- }
62
+ Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
63
+ if (inputColumn.getType() instanceof StringType && columnConfig.getType() instanceof TimestampType) {
64
+ TimestampParser parser = getTimestampParser(columnConfig, task);
65
+ this.timestampParserMap.put(columnConfig.getName(), parser);
117
66
  }
118
67
  }
119
68
  }
120
69
 
121
- private void buildJsonPathTypeMap()
70
+ private void buildTimestampFormatterMap()
122
71
  {
123
- // json path => Type
72
+ // columnName => TimestampFormatter
124
73
  for (ColumnConfig columnConfig : task.getColumns()) {
125
- String name = columnConfig.getName();
126
- if (!name.startsWith("$.")) {
127
- continue;
74
+ if (columnConfig.getName().startsWith("$.")) {
75
+ continue; // type: json columns do not have type: timestamp
76
+ }
77
+ Column inputColumn = inputSchema.lookupColumn(columnConfig.getName());
78
+ if (inputColumn.getType() instanceof TimestampType && columnConfig.getType() instanceof StringType) {
79
+ TimestampFormatter parser = getTimestampFormatter(columnConfig, task);
80
+ this.timestampFormatterMap.put(columnConfig.getName(), parser);
128
81
  }
129
- Type type = columnConfig.getType();
130
- this.jsonPathTypeMap.put(name, type);
131
82
  }
132
83
  }
133
84
 
134
- private boolean shouldVisitJsonPath(String jsonPath)
85
+ private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task)
135
86
  {
136
- return shouldVisitJsonPathSet.contains(jsonPath);
87
+ DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
88
+ String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
89
+ return new TimestampParser(task.getJRuby(), format, timezone);
137
90
  }
138
91
 
139
- private Value castJsonRecursively(PluginTask task, String jsonPath, Value value)
92
+ private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task)
140
93
  {
141
- if (!shouldVisitJsonPath(jsonPath)) {
142
- return value;
143
- }
144
- if (value.isArrayValue()) {
145
- ArrayValue arrayValue = value.asArrayValue();
146
- int size = arrayValue.size();
147
- Value[] newValue = new Value[size];
148
- for (int i = 0; i < size; i++) {
149
- String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
150
- Value v = arrayValue.get(i);
151
- newValue[i] = castJsonRecursively(task, k, v);
152
- }
153
- return ValueFactory.newArray(newValue, true);
154
- }
155
- else if (value.isMapValue()) {
156
- MapValue mapValue = value.asMapValue();
157
- int size = mapValue.size() * 2;
158
- Value[] newValue = new Value[size];
159
- int i = 0;
160
- for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
161
- Value k = entry.getKey();
162
- Value v = entry.getValue();
163
- String newPath = new StringBuilder(jsonPath).append(".").append(k.asStringValue().asString()).toString();
164
- Value r = castJsonRecursively(task, newPath, v);
165
- newValue[i++] = k;
166
- newValue[i++] = r;
167
- }
168
- return ValueFactory.newMap(newValue, true);
169
- }
170
- else if (value.isBooleanValue()) {
171
- Type outputType = jsonPathTypeMap.get(jsonPath);
172
- return TypecastJsonBuilder.getFromBoolean(outputType, value.asBooleanValue().getBoolean());
173
- }
174
- else if (value.isIntegerValue()) {
175
- Type outputType = jsonPathTypeMap.get(jsonPath);
176
- return TypecastJsonBuilder.getFromLong(outputType, value.asIntegerValue().asLong());
177
- }
178
- else if (value.isFloatValue()) {
179
- Type outputType = jsonPathTypeMap.get(jsonPath);
180
- return TypecastJsonBuilder.getFromDouble(outputType, value.asFloatValue().toDouble());
181
- }
182
- else if (value.isStringValue()) {
183
- Type outputType = jsonPathTypeMap.get(jsonPath);
184
- return TypecastJsonBuilder.getFromString(outputType, value.asStringValue().asString());
185
- }
186
- else {
187
- return value;
188
- }
94
+ String format = columnConfig.getFormat().or(task.getDefaultTimestampFormat());
95
+ DateTimeZone timezone = columnConfig.getTimeZone().or(task.getDefaultTimeZone());
96
+ return new TimestampFormatter(task.getJRuby(), format, timezone);
189
97
  }
190
98
 
191
99
  private interface PageBuildable
@@ -217,7 +125,7 @@ public class ColumnVisitorImpl
217
125
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
218
126
  PageBuildable op = new PageBuildable() {
219
127
  public void run() throws DataException {
220
- TypecastPageBuilder.setFromBoolean(pageBuilder, outputColumn, pageReader.getBoolean(inputColumn));
128
+ columnCaster.setFromBoolean(outputColumn, pageReader.getBoolean(inputColumn));
221
129
  }
222
130
  };
223
131
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -229,7 +137,7 @@ public class ColumnVisitorImpl
229
137
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
230
138
  PageBuildable op = new PageBuildable() {
231
139
  public void run() throws DataException {
232
- TypecastPageBuilder.setFromLong(pageBuilder, outputColumn, pageReader.getLong(inputColumn));
140
+ columnCaster.setFromLong(outputColumn, pageReader.getLong(inputColumn));
233
141
  }
234
142
  };
235
143
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -241,7 +149,7 @@ public class ColumnVisitorImpl
241
149
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
242
150
  PageBuildable op = new PageBuildable() {
243
151
  public void run() throws DataException {
244
- TypecastPageBuilder.setFromDouble(pageBuilder, outputColumn, pageReader.getDouble(inputColumn));
152
+ columnCaster.setFromDouble(outputColumn, pageReader.getDouble(inputColumn));
245
153
  }
246
154
  };
247
155
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -254,8 +162,7 @@ public class ColumnVisitorImpl
254
162
  final TimestampParser timestampParser = timestampParserMap.get(inputColumn.getName());
255
163
  PageBuildable op = new PageBuildable() {
256
164
  public void run() throws DataException {
257
- TypecastPageBuilder.setFromString(
258
- pageBuilder, outputColumn, pageReader.getString(inputColumn), timestampParser);
165
+ columnCaster.setFromString(outputColumn, pageReader.getString(inputColumn), timestampParser);
259
166
  }
260
167
  };
261
168
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -268,8 +175,7 @@ public class ColumnVisitorImpl
268
175
  final TimestampFormatter timestampFormatter = timestampFormatterMap.get(inputColumn.getName());
269
176
  PageBuildable op = new PageBuildable() {
270
177
  public void run() throws DataException {
271
- TypecastPageBuilder.setFromTimestamp(
272
- pageBuilder, outputColumn, pageReader.getTimestamp(inputColumn), timestampFormatter);
178
+ columnCaster.setFromTimestamp(outputColumn, pageReader.getTimestamp(inputColumn), timestampFormatter);
273
179
  }
274
180
  };
275
181
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -278,13 +184,10 @@ public class ColumnVisitorImpl
278
184
  @Override
279
185
  public void jsonColumn(final Column inputColumn)
280
186
  {
281
- String jsonPath = new StringBuilder("$.").append(inputColumn.getName()).toString();
282
- Value value = pageReader.getJson(inputColumn);
283
- final Value castedValue = castJsonRecursively(task, jsonPath, value);
284
187
  final Column outputColumn = outputColumnMap.get(inputColumn.getName());
285
188
  PageBuildable op = new PageBuildable() {
286
189
  public void run() throws DataException {
287
- TypecastPageBuilder.setFromJson(pageBuilder, outputColumn, castedValue);
190
+ columnCaster.setFromJson(outputColumn, pageReader.getJson(inputColumn));
288
191
  }
289
192
  };
290
193
  withStopOnInvalidRecord(op, inputColumn, outputColumn);
@@ -4,21 +4,28 @@ import org.embulk.filter.typecast.cast.*;
4
4
 
5
5
  import org.embulk.spi.DataException;
6
6
  import org.embulk.spi.type.*;
7
+ import org.msgpack.value.BooleanValue;
8
+ import org.msgpack.value.IntegerValue;
9
+ import org.msgpack.value.FloatValue;
10
+ import org.msgpack.value.StringValue;
7
11
  import org.msgpack.value.Value;
8
12
  import org.msgpack.value.ValueFactory;
9
13
 
10
- class TypecastJsonBuilder {
11
- static Value getFromBoolean(Type outputType, boolean value) {
14
+ class JsonCaster
15
+ {
16
+ public JsonCaster()
17
+ {
18
+ }
19
+
20
+ public Value fromBoolean(Type outputType, BooleanValue value) {
12
21
  if (outputType instanceof BooleanType) {
13
- return ValueFactory.newBoolean(value);
22
+ return value;
14
23
  } else if (outputType instanceof LongType) {
15
- return ValueFactory.newInteger(BooleanCast.asLong(value));
24
+ return ValueFactory.newInteger(BooleanCast.asLong(value.getBoolean()));
16
25
  } else if (outputType instanceof DoubleType) {
17
- return ValueFactory.newFloat(BooleanCast.asDouble(value));
26
+ return ValueFactory.newFloat(BooleanCast.asDouble(value.getBoolean()));
18
27
  } else if (outputType instanceof StringType) {
19
- return ValueFactory.newString(BooleanCast.asString(value));
20
- } else if (outputType instanceof TimestampType) {
21
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
28
+ return ValueFactory.newString(BooleanCast.asString(value.getBoolean()));
22
29
  } else if (outputType instanceof JsonType) {
23
30
  throw new DataException(String.format("cannot cast boolean to json: \"%s\"", value));
24
31
  } else {
@@ -27,18 +34,16 @@ class TypecastJsonBuilder {
27
34
  }
28
35
  }
29
36
 
30
- static Value getFromLong(Type outputType, long value)
37
+ public Value fromLong(Type outputType, IntegerValue value)
31
38
  {
32
39
  if (outputType instanceof BooleanType) {
33
- return ValueFactory.newBoolean(LongCast.asBoolean(value));
40
+ return ValueFactory.newBoolean(LongCast.asBoolean(value.asLong()));
34
41
  } else if (outputType instanceof LongType) {
35
- return ValueFactory.newInteger(value);
42
+ return value;
36
43
  } else if (outputType instanceof DoubleType) {
37
- return ValueFactory.newFloat(LongCast.asDouble(value));
44
+ return ValueFactory.newFloat(LongCast.asDouble(value.asLong()));
38
45
  } else if (outputType instanceof StringType) {
39
- return ValueFactory.newString(LongCast.asString(value));
40
- } else if (outputType instanceof TimestampType) {
41
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
46
+ return ValueFactory.newString(LongCast.asString(value.asLong()));
42
47
  } else if (outputType instanceof JsonType) {
43
48
  throw new DataException(String.format("cannot cast long to json:: \"%s\"", value));
44
49
  } else {
@@ -47,18 +52,16 @@ class TypecastJsonBuilder {
47
52
  }
48
53
  }
49
54
 
50
- static Value getFromDouble(Type outputType, double value)
55
+ public Value fromDouble(Type outputType, FloatValue value)
51
56
  {
52
57
  if (outputType instanceof BooleanType) {
53
- return ValueFactory.newBoolean(DoubleCast.asBoolean(value));
58
+ return ValueFactory.newBoolean(DoubleCast.asBoolean(value.toDouble()));
54
59
  } else if (outputType instanceof LongType) {
55
- return ValueFactory.newInteger(DoubleCast.asLong(value));
60
+ return ValueFactory.newInteger(DoubleCast.asLong(value.toDouble()));
56
61
  } else if (outputType instanceof DoubleType) {
57
- return ValueFactory.newFloat(DoubleCast.asDouble(value));
62
+ return value;
58
63
  } else if (outputType instanceof StringType) {
59
- return ValueFactory.newString(DoubleCast.asString(value));
60
- } else if (outputType instanceof TimestampType) {
61
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
64
+ return ValueFactory.newString(DoubleCast.asString(value.toDouble()));
62
65
  } else if (outputType instanceof JsonType) {
63
66
  throw new DataException(String.format("cannot cast double to json:: \"%s\"", value));
64
67
  } else {
@@ -67,40 +70,18 @@ class TypecastJsonBuilder {
67
70
  }
68
71
  }
69
72
 
70
- static Value getFromString(Type outputType, String value)
71
- {
72
- if (outputType instanceof BooleanType) {
73
- return ValueFactory.newBoolean(StringCast.asBoolean(value));
74
- } else if (outputType instanceof LongType) {
75
- return ValueFactory.newInteger(StringCast.asLong(value));
76
- } else if (outputType instanceof DoubleType) {
77
- return ValueFactory.newFloat(StringCast.asDouble(value));
78
- } else if (outputType instanceof StringType) {
79
- return ValueFactory.newString(StringCast.asString(value));
80
- } else if (outputType instanceof TimestampType) {
81
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
82
- } else if (outputType instanceof JsonType) {
83
- return StringCast.asJson(value);
84
- } else {
85
- assert(false);
86
- return null;
87
- }
88
- }
89
-
90
- static Value getFromJson(Type outputType, Value value)
73
+ public Value fromString(Type outputType, StringValue value)
91
74
  {
92
75
  if (outputType instanceof BooleanType) {
93
- return ValueFactory.newBoolean(JsonCast.asBoolean(value));
76
+ return ValueFactory.newBoolean(StringCast.asBoolean(value.asString()));
94
77
  } else if (outputType instanceof LongType) {
95
- return ValueFactory.newInteger(JsonCast.asLong(value));
78
+ return ValueFactory.newInteger(StringCast.asLong(value.asString()));
96
79
  } else if (outputType instanceof DoubleType) {
97
- return ValueFactory.newFloat(JsonCast.asDouble(value));
80
+ return ValueFactory.newFloat(StringCast.asDouble(value.asString()));
98
81
  } else if (outputType instanceof StringType) {
99
- return ValueFactory.newString(JsonCast.asString(value));
100
- } else if (outputType instanceof TimestampType) {
101
- throw new DataException(String.format("no timestamp type in json: \"%s\"", value));
102
- } else if (outputType instanceof JsonType) {
103
82
  return value;
83
+ } else if (outputType instanceof JsonType) {
84
+ return StringCast.asJson(value.asString());
104
85
  } else {
105
86
  assert(false);
106
87
  return null;
@@ -0,0 +1,136 @@
1
+ package org.embulk.filter.typecast;
2
+
3
+ import org.embulk.spi.*;
4
+ import org.embulk.spi.type.Type;
5
+ import org.msgpack.value.ArrayValue;
6
+ import org.msgpack.value.MapValue;
7
+ import org.msgpack.value.Value;
8
+ import org.msgpack.value.ValueFactory;
9
+
10
+ import org.embulk.filter.typecast.TypecastFilterPlugin.ColumnConfig;
11
+ import org.embulk.filter.typecast.TypecastFilterPlugin.PluginTask;
12
+
13
+ import org.slf4j.Logger;
14
+
15
+ import java.util.HashMap;
16
+ import java.util.HashSet;
17
+ import java.util.Map;
18
+
19
+ public class JsonVisitor
20
+ {
21
+ private static final Logger logger = Exec.getLogger(TypecastFilterPlugin.class);
22
+ private final PluginTask task;
23
+ private final Schema inputSchema;
24
+ private final Schema outputSchema;
25
+ private final HashSet<String> shouldVisitSet = new HashSet<>();
26
+ private final HashMap<String, Type> jsonPathTypeMap = new HashMap<>();
27
+ private final JsonCaster jsonCaster = new JsonCaster();
28
+
29
+ JsonVisitor(PluginTask task, Schema inputSchema, Schema outputSchema)
30
+ {
31
+ this.task = task;
32
+ this.inputSchema = inputSchema;
33
+ this.outputSchema = outputSchema;
34
+
35
+ buildShouldVisitSet();
36
+ buildJsonPathTypeMap();
37
+ }
38
+
39
+ private void buildJsonPathTypeMap()
40
+ {
41
+ // json path => Type
42
+ for (ColumnConfig columnConfig : task.getColumns()) {
43
+ String name = columnConfig.getName();
44
+ if (!name.startsWith("$.")) {
45
+ continue;
46
+ }
47
+ Type type = columnConfig.getType();
48
+ this.jsonPathTypeMap.put(name, type);
49
+ }
50
+ }
51
+
52
+ private void buildShouldVisitSet()
53
+ {
54
+ // json partial path => Boolean to avoid unnecessary type: json visit
55
+ for (ColumnConfig columnConfig : task.getColumns()) {
56
+ String name = columnConfig.getName();
57
+ if (!name.startsWith("$.")) {
58
+ continue;
59
+ }
60
+ String[] parts = name.split("\\.");
61
+ StringBuilder partialPath = new StringBuilder("$");
62
+ for (int i = 1; i < parts.length; i++) {
63
+ if (parts[i].contains("[")) {
64
+ String[] arrayParts = parts[i].split("\\[");
65
+ partialPath.append(".").append(arrayParts[0]);
66
+ this.shouldVisitSet.add(partialPath.toString());
67
+ for (int j = 1; j < arrayParts.length; j++) {
68
+ partialPath.append("[").append(arrayParts[j]);
69
+ this.shouldVisitSet.add(partialPath.toString());
70
+ }
71
+ }
72
+ else {
73
+ partialPath.append(".").append(parts[i]);
74
+ this.shouldVisitSet.add(partialPath.toString());
75
+ }
76
+ }
77
+ }
78
+ }
79
+
80
+ private boolean shouldVisit(String jsonPath)
81
+ {
82
+ return shouldVisitSet.contains(jsonPath);
83
+ }
84
+
85
+ public Value visit(String jsonPath, Value value)
86
+ {
87
+ if (!shouldVisit(jsonPath)) {
88
+ return value;
89
+ }
90
+ if (value.isArrayValue()) {
91
+ ArrayValue arrayValue = value.asArrayValue();
92
+ int size = arrayValue.size();
93
+ Value[] newValue = new Value[size];
94
+ for (int i = 0; i < size; i++) {
95
+ String k = new StringBuilder(jsonPath).append("[").append(Integer.toString(i)).append("]").toString();
96
+ Value v = arrayValue.get(i);
97
+ newValue[i] = visit(k, v);
98
+ }
99
+ return ValueFactory.newArray(newValue, true);
100
+ }
101
+ else if (value.isMapValue()) {
102
+ MapValue mapValue = value.asMapValue();
103
+ int size = mapValue.size() * 2;
104
+ Value[] newValue = new Value[size];
105
+ int i = 0;
106
+ for (Map.Entry<Value, Value> entry : mapValue.entrySet()) {
107
+ Value k = entry.getKey();
108
+ Value v = entry.getValue();
109
+ String newPath = new StringBuilder(jsonPath).append(".").append(k.asStringValue().asString()).toString();
110
+ Value r = visit(newPath, v);
111
+ newValue[i++] = k;
112
+ newValue[i++] = r;
113
+ }
114
+ return ValueFactory.newMap(newValue, true);
115
+ }
116
+ else if (value.isBooleanValue()) {
117
+ Type outputType = jsonPathTypeMap.get(jsonPath);
118
+ return jsonCaster.fromBoolean(outputType, value.asBooleanValue());
119
+ }
120
+ else if (value.isIntegerValue()) {
121
+ Type outputType = jsonPathTypeMap.get(jsonPath);
122
+ return jsonCaster.fromLong(outputType, value.asIntegerValue());
123
+ }
124
+ else if (value.isFloatValue()) {
125
+ Type outputType = jsonPathTypeMap.get(jsonPath);
126
+ return jsonCaster.fromDouble(outputType, value.asFloatValue());
127
+ }
128
+ else if (value.isStringValue()) {
129
+ Type outputType = jsonPathTypeMap.get(jsonPath);
130
+ return jsonCaster.fromString(outputType, value.asStringValue());
131
+ }
132
+ else {
133
+ return value;
134
+ }
135
+ }
136
+ }
@@ -4,15 +4,18 @@ import com.google.common.base.Optional;
4
4
  import com.google.common.collect.ImmutableList;
5
5
  import org.embulk.config.Config;
6
6
  import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigException;
7
8
  import org.embulk.config.ConfigInject;
8
9
  import org.embulk.config.ConfigSource;
9
10
  import org.embulk.config.Task;
10
11
  import org.embulk.config.TaskSource;
11
12
 
12
13
  import org.embulk.spi.*;
14
+ import org.embulk.spi.time.Timestamp;
13
15
  import org.embulk.spi.time.TimestampFormatter;
14
16
  import org.embulk.spi.time.TimestampParser;
15
17
 
18
+ import org.embulk.spi.type.TimestampType;
16
19
  import org.embulk.spi.type.Type;
17
20
  import org.joda.time.DateTimeZone;
18
21
  import org.jruby.embed.ScriptingContainer;
@@ -86,7 +89,7 @@ public class TypecastFilterPlugin implements FilterPlugin
86
89
  // throw if column does not exist
87
90
  for (ColumnConfig columnConfig : columnConfigs) {
88
91
  String name = columnConfig.getName();
89
- if (name.startsWith("$.")) {
92
+ if (name.startsWith("$.")) { // check only top level column name
90
93
  String firstName = name.split("\\.", 3)[1];
91
94
  inputSchema.lookupColumn(firstName);
92
95
  }
@@ -94,6 +97,13 @@ public class TypecastFilterPlugin implements FilterPlugin
94
97
  inputSchema.lookupColumn(name);
95
98
  }
96
99
  }
100
+ // throw if timestamp is specified in json path
101
+ for (ColumnConfig columnConfig : columnConfigs) {
102
+ String name = columnConfig.getName();
103
+ if (name.startsWith("$.") && columnConfig.getType() instanceof TimestampType) {
104
+ throw new ConfigException(String.format("embulk-filter-typecast: timestamp type is not supported in json column: \"%s\"", name));
105
+ }
106
+ }
97
107
  }
98
108
 
99
109
  private Schema buildOuputSchema(final PluginTask task, final Schema inputSchema)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-filter-typecast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-27 00:00:00.000000000 Z
11
+ date: 2016-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,7 @@ files:
52
52
  - README.md
53
53
  - build.gradle
54
54
  - config/checkstyle/checkstyle.xml
55
+ - example/example.csv
55
56
  - example/example.yml
56
57
  - example/example2.yml
57
58
  - gradle/wrapper/gradle-wrapper.jar
@@ -59,10 +60,11 @@ files:
59
60
  - gradlew
60
61
  - gradlew.bat
61
62
  - lib/embulk/filter/typecast.rb
63
+ - src/main/java/org/embulk/filter/typecast/ColumnCaster.java
62
64
  - src/main/java/org/embulk/filter/typecast/ColumnVisitorImpl.java
65
+ - src/main/java/org/embulk/filter/typecast/JsonCaster.java
66
+ - src/main/java/org/embulk/filter/typecast/JsonVisitor.java
63
67
  - src/main/java/org/embulk/filter/typecast/TypecastFilterPlugin.java
64
- - src/main/java/org/embulk/filter/typecast/TypecastJsonBuilder.java
65
- - src/main/java/org/embulk/filter/typecast/TypecastPageBuilder.java
66
68
  - src/main/java/org/embulk/filter/typecast/cast/BooleanCast.java
67
69
  - src/main/java/org/embulk/filter/typecast/cast/DoubleCast.java
68
70
  - src/main/java/org/embulk/filter/typecast/cast/JsonCast.java
@@ -70,7 +72,7 @@ files:
70
72
  - src/main/java/org/embulk/filter/typecast/cast/StringCast.java
71
73
  - src/main/java/org/embulk/filter/typecast/cast/TimestampCast.java
72
74
  - src/test/java/org/embulk/filter/TestTypecastFilterPlugin.java
73
- - classpath/embulk-filter-typecast-0.1.1.jar
75
+ - classpath/embulk-filter-typecast-0.1.2.jar
74
76
  homepage: https://github.com/sonots/embulk-filter-typecast
75
77
  licenses:
76
78
  - MIT