embulk-parser-avro 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +27 -1
- data/build.gradle +8 -8
- data/src/main/java/org/embulk/parser/avro/AvroParserPlugin.java +24 -1
- data/src/main/java/org/embulk/parser/avro/TimestampUnit.java +70 -0
- data/src/main/java/org/embulk/parser/avro/TimestampUnitDeserializer.java +55 -0
- data/src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java +8 -5
- data/src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java +7 -4
- data/src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java +7 -4
- data/src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java +7 -3
- data/src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java +7 -3
- data/src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java +41 -2
- data/src/test/resources/org/embulk/parser/avro/.gitignore +1 -0
- data/src/test/resources/org/embulk/parser/avro/Gemfile +7 -0
- data/src/test/resources/org/embulk/parser/avro/Gemfile.lock +15 -0
- data/src/test/resources/org/embulk/parser/avro/data.json +6 -0
- data/src/test/resources/org/embulk/parser/avro/data2.json +1 -0
- data/src/test/resources/org/embulk/parser/avro/data_creator.rb +17 -0
- data/src/test/resources/org/embulk/parser/avro/item2.avsc +20 -0
- data/src/test/resources/org/embulk/parser/avro/items2.avro +0 -0
- metadata +27 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ee00ffbaf0cf78dfc2c7bd0861e68603b528565
|
4
|
+
data.tar.gz: 08786ae000735a7f8d2aad1514a02de9c5095ec4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2697fb6b9cd4fb2cf6a72194450031f11ade7780d2afb050d40d65de39a10a5b56da507125b5580de820ba78e399ff39348726bb0eb82edb0c7aae13de27a555
|
7
|
+
data.tar.gz: 839d69c7a19dfc6eeddff3b4fb9f2bee3172b1d4ab05793753bae1fb466ef1bfa91c5e86e099f3fe44bc5c0e0047ed3a34eb789ae26c7e0507b29c25a58ce4ca
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -12,11 +12,37 @@
|
|
12
12
|
- **type**: Specify this parser as avro
|
13
13
|
- **avsc**: Specify avro schema file.
|
14
14
|
- **columns**: Specify column name and type. See below (array, optional)
|
15
|
+
- timestamp_unit: Specify unit of time. (This config is effective only if avro value is `long`, `int`, `float`, `double`)
|
15
16
|
* **default_timezone**: Default timezone of the timestamp (string, default: UTC)
|
16
17
|
* **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`)
|
17
18
|
|
18
19
|
If columns is not set, this plugin detect schema automatically by using avsc schema.
|
19
20
|
|
21
|
+
support `timestamp_unit` type is below.
|
22
|
+
|
23
|
+
- "Second"
|
24
|
+
- "second"
|
25
|
+
- "sec"
|
26
|
+
- "s"
|
27
|
+
- "MilliSecond"
|
28
|
+
- "millisecond"
|
29
|
+
- "milli_second"
|
30
|
+
- "milli"
|
31
|
+
- "msec"
|
32
|
+
- "ms"
|
33
|
+
- "MicroSecond"
|
34
|
+
- "microsecond"
|
35
|
+
- "micro_second"
|
36
|
+
- "micro"
|
37
|
+
- "usec"
|
38
|
+
- "us"
|
39
|
+
- "NanoSecond"
|
40
|
+
- "nanosecond"
|
41
|
+
- "nano_second"
|
42
|
+
- "nano"
|
43
|
+
- "nsec"
|
44
|
+
- "ns"
|
45
|
+
|
20
46
|
## Example
|
21
47
|
|
22
48
|
```yaml
|
@@ -38,7 +64,7 @@ in:
|
|
38
64
|
- {name: "options", type: "json"}
|
39
65
|
- {name: "spec", type: "json"}
|
40
66
|
- {name: "created_at", type: "timestamp", format: "%Y-%m-%dT%H:%M:%S%:z"}
|
41
|
-
- {name: "created_at_utc", type: "timestamp"}
|
67
|
+
- {name: "created_at_utc", type: "timestamp", timestamp_unit: "second"}
|
42
68
|
|
43
69
|
out:
|
44
70
|
type: stdout
|
data/build.gradle
CHANGED
@@ -13,19 +13,19 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.3.0"
|
17
17
|
|
18
|
-
sourceCompatibility = 1.
|
19
|
-
targetCompatibility = 1.
|
18
|
+
sourceCompatibility = 1.8
|
19
|
+
targetCompatibility = 1.8
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
provided "org.embulk:embulk-core:0.
|
24
|
-
compile "org.apache.avro:avro:1.8.
|
22
|
+
compile "org.embulk:embulk-core:0.9.4"
|
23
|
+
provided "org.embulk:embulk-core:0.9.4"
|
24
|
+
compile "org.apache.avro:avro:1.8.2"
|
25
25
|
testCompile "junit:junit:4.+"
|
26
26
|
|
27
|
-
testCompile "org.embulk:embulk-core:0.
|
28
|
-
testCompile "org.embulk:embulk-standards:0.
|
27
|
+
testCompile "org.embulk:embulk-core:0.9.4:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.9.4"
|
29
29
|
}
|
30
30
|
|
31
31
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -15,6 +15,7 @@ import org.embulk.config.TaskSource;
|
|
15
15
|
import org.embulk.parser.avro.getter.BaseColumnGetter;
|
16
16
|
import org.embulk.parser.avro.getter.ColumnGetterFactory;
|
17
17
|
import org.embulk.spi.Column;
|
18
|
+
import org.embulk.spi.ColumnConfig;
|
18
19
|
import org.embulk.spi.Exec;
|
19
20
|
import org.embulk.spi.FileInput;
|
20
21
|
import org.embulk.spi.PageBuilder;
|
@@ -23,6 +24,7 @@ import org.embulk.spi.ParserPlugin;
|
|
23
24
|
import org.embulk.spi.Schema;
|
24
25
|
import org.embulk.spi.SchemaConfig;
|
25
26
|
import org.embulk.spi.time.TimestampParser;
|
27
|
+
import org.embulk.spi.type.TimestampType;
|
26
28
|
import org.embulk.spi.type.Types;
|
27
29
|
import org.embulk.spi.unit.LocalFile;
|
28
30
|
import org.embulk.spi.util.FileInputInputStream;
|
@@ -46,6 +48,13 @@ public class AvroParserPlugin
|
|
46
48
|
LocalFile getAvsc();
|
47
49
|
}
|
48
50
|
|
51
|
+
public interface TimestampUnitConfig extends Task
|
52
|
+
{
|
53
|
+
@Config("timestamp_unit")
|
54
|
+
@ConfigDefault("\"second\"")
|
55
|
+
public TimestampUnit getTimestampUnit();
|
56
|
+
}
|
57
|
+
|
49
58
|
@Override
|
50
59
|
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
51
60
|
{
|
@@ -128,6 +137,7 @@ public class AvroParserPlugin
|
|
128
137
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
129
138
|
List<Column> columns = schema.getColumns();
|
130
139
|
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns());
|
140
|
+
final TimestampUnit[] timestampUnits = newTimestampUnits(task.getColumns());
|
131
141
|
File avsc = task.getAvsc().getFile();
|
132
142
|
final org.apache.avro.Schema avroSchema;
|
133
143
|
try {
|
@@ -137,7 +147,7 @@ public class AvroParserPlugin
|
|
137
147
|
}
|
138
148
|
|
139
149
|
try (FileInputInputStream is = new FileInputInputStream(input); final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
140
|
-
ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers);
|
150
|
+
ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers, timestampUnits);
|
141
151
|
ImmutableMap.Builder<String, BaseColumnGetter> columnGettersBuilder = ImmutableMap.builder();
|
142
152
|
for (Column column : columns) {
|
143
153
|
BaseColumnGetter columnGetter = factory.newColumnGetter(column);
|
@@ -165,4 +175,17 @@ public class AvroParserPlugin
|
|
165
175
|
throw new RuntimeException(e);
|
166
176
|
}
|
167
177
|
}
|
178
|
+
|
179
|
+
private TimestampUnit[] newTimestampUnits(SchemaConfig columns) {
|
180
|
+
TimestampUnit[] units = new TimestampUnit[columns.getColumnCount()];
|
181
|
+
int i = 0;
|
182
|
+
for (ColumnConfig column : columns.getColumns()) {
|
183
|
+
if (column.getType() instanceof TimestampType) {
|
184
|
+
TimestampUnitConfig option = column.getOption().loadConfig(TimestampUnitConfig.class);
|
185
|
+
units[i] = option.getTimestampUnit();
|
186
|
+
}
|
187
|
+
i++;
|
188
|
+
}
|
189
|
+
return units;
|
190
|
+
}
|
168
191
|
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
package org.embulk.parser.avro;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
|
6
|
+
|
7
|
+
@JsonDeserialize(using=TimestampUnitDeserializer.class)
|
8
|
+
public enum TimestampUnit {
|
9
|
+
Second {
|
10
|
+
@Override
|
11
|
+
public Timestamp toTimestamp(Long value)
|
12
|
+
{
|
13
|
+
return Timestamp.ofEpochSecond(value);
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public Timestamp toTimestamp(Double value)
|
18
|
+
{
|
19
|
+
long sec = value.longValue();
|
20
|
+
double rest = value - sec;
|
21
|
+
return Timestamp.ofEpochSecond(0, sec * 1000000000L + (long) (rest * 1000000000L));
|
22
|
+
}
|
23
|
+
},
|
24
|
+
MilliSecond {
|
25
|
+
@Override
|
26
|
+
public Timestamp toTimestamp(Long value)
|
27
|
+
{
|
28
|
+
return Timestamp.ofEpochSecond(0, value * 1000000L);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public Timestamp toTimestamp(Double value)
|
33
|
+
{
|
34
|
+
long sec = value.longValue();
|
35
|
+
double rest = value - sec;
|
36
|
+
return Timestamp.ofEpochSecond(0, sec * 1000000L + (long) (rest * 1000000L));
|
37
|
+
}
|
38
|
+
},
|
39
|
+
MicroSecond {
|
40
|
+
@Override
|
41
|
+
public Timestamp toTimestamp(Long value)
|
42
|
+
{
|
43
|
+
return Timestamp.ofEpochSecond(0, value * 1000L);
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public Timestamp toTimestamp(Double value)
|
48
|
+
{
|
49
|
+
long sec = value.longValue();
|
50
|
+
double rest = value - sec;
|
51
|
+
return Timestamp.ofEpochSecond(0, sec * 1000L + (long) (rest * 1000L));
|
52
|
+
}
|
53
|
+
},
|
54
|
+
NanoSecond {
|
55
|
+
@Override
|
56
|
+
public Timestamp toTimestamp(Long value)
|
57
|
+
{
|
58
|
+
return Timestamp.ofEpochSecond(0, value);
|
59
|
+
}
|
60
|
+
|
61
|
+
@Override
|
62
|
+
public Timestamp toTimestamp(Double value)
|
63
|
+
{
|
64
|
+
return Timestamp.ofEpochSecond(0, value.longValue());
|
65
|
+
}
|
66
|
+
};
|
67
|
+
|
68
|
+
abstract public Timestamp toTimestamp(Long value);
|
69
|
+
abstract public Timestamp toTimestamp(Double value);
|
70
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
package org.embulk.parser.avro;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
|
+
import com.fasterxml.jackson.databind.DeserializationContext;
|
5
|
+
import com.fasterxml.jackson.databind.JsonMappingException;
|
6
|
+
import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer;
|
7
|
+
import com.google.common.base.Joiner;
|
8
|
+
import com.google.common.collect.ImmutableMap;
|
9
|
+
|
10
|
+
import java.io.IOException;
|
11
|
+
|
12
|
+
public class TimestampUnitDeserializer extends FromStringDeserializer<TimestampUnit>
|
13
|
+
{
|
14
|
+
|
15
|
+
public static ImmutableMap<String, TimestampUnit> mapping = ImmutableMap.<String, TimestampUnit>builder()
|
16
|
+
.put("Second", TimestampUnit.Second)
|
17
|
+
.put("second", TimestampUnit.Second)
|
18
|
+
.put("sec", TimestampUnit.Second)
|
19
|
+
.put("s", TimestampUnit.Second)
|
20
|
+
.put("MilliSecond", TimestampUnit.MilliSecond)
|
21
|
+
.put("millisecond", TimestampUnit.MilliSecond)
|
22
|
+
.put("milli_second", TimestampUnit.MilliSecond)
|
23
|
+
.put("milli", TimestampUnit.MilliSecond)
|
24
|
+
.put("msec", TimestampUnit.MilliSecond)
|
25
|
+
.put("ms", TimestampUnit.MilliSecond)
|
26
|
+
.put("MicroSecond", TimestampUnit.MicroSecond)
|
27
|
+
.put("microsecond", TimestampUnit.MicroSecond)
|
28
|
+
.put("micro_second", TimestampUnit.MicroSecond)
|
29
|
+
.put("micro", TimestampUnit.MicroSecond)
|
30
|
+
.put("usec", TimestampUnit.MicroSecond)
|
31
|
+
.put("us", TimestampUnit.MicroSecond)
|
32
|
+
.put("NanoSecond", TimestampUnit.NanoSecond)
|
33
|
+
.put("nanosecond", TimestampUnit.NanoSecond)
|
34
|
+
.put("nano_second", TimestampUnit.NanoSecond)
|
35
|
+
.put("nano", TimestampUnit.NanoSecond)
|
36
|
+
.put("nsec", TimestampUnit.NanoSecond)
|
37
|
+
.put("ns", TimestampUnit.NanoSecond)
|
38
|
+
.build();
|
39
|
+
|
40
|
+
public TimestampUnitDeserializer() {
|
41
|
+
super(TimestampUnit.class);
|
42
|
+
}
|
43
|
+
|
44
|
+
@Override
|
45
|
+
protected TimestampUnit _deserialize(String value, DeserializationContext ctxt) throws IOException, JsonProcessingException {
|
46
|
+
TimestampUnit unit = mapping.get(value);
|
47
|
+
if (unit == null) {
|
48
|
+
throw new JsonMappingException(
|
49
|
+
String.format("Unknown type name '%s'. Supported types are: %s",
|
50
|
+
value,
|
51
|
+
Joiner.on(", ").join(mapping.keySet())));
|
52
|
+
}
|
53
|
+
return unit;
|
54
|
+
}
|
55
|
+
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
3
|
import org.apache.avro.Schema;
|
4
|
+
import org.embulk.parser.avro.TimestampUnit;
|
4
5
|
import org.embulk.spi.Column;
|
5
6
|
import org.embulk.spi.DataException;
|
6
7
|
import org.embulk.spi.PageBuilder;
|
@@ -10,12 +11,14 @@ public class ColumnGetterFactory {
|
|
10
11
|
private org.apache.avro.Schema avroSchema;
|
11
12
|
private PageBuilder pageBuilder;
|
12
13
|
private TimestampParser[] timestampParsers;
|
14
|
+
private TimestampUnit[] timestampUnits;
|
13
15
|
|
14
|
-
public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
16
|
+
public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits)
|
15
17
|
{
|
16
18
|
this.avroSchema = avroSchema;
|
17
19
|
this.pageBuilder = pageBuilder;
|
18
20
|
this.timestampParsers = timestampParsers;
|
21
|
+
this.timestampUnits = timestampUnits;
|
19
22
|
}
|
20
23
|
|
21
24
|
public BaseColumnGetter newColumnGetter(Column column)
|
@@ -43,13 +46,13 @@ public class ColumnGetterFactory {
|
|
43
46
|
case ENUM:
|
44
47
|
return new StringColumnGetter(pageBuilder, timestampParsers);
|
45
48
|
case INT:
|
46
|
-
return new IntegerColumnGetter(pageBuilder, timestampParsers);
|
49
|
+
return new IntegerColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
47
50
|
case LONG:
|
48
|
-
return new LongColumnGetter(pageBuilder, timestampParsers);
|
51
|
+
return new LongColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
49
52
|
case FLOAT:
|
50
|
-
return new FloatColumnGetter(pageBuilder, timestampParsers);
|
53
|
+
return new FloatColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
51
54
|
case DOUBLE:
|
52
|
-
return new DoubleColumnGetter(pageBuilder, timestampParsers);
|
55
|
+
return new DoubleColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
53
56
|
case BOOLEAN:
|
54
57
|
return new BooleanColumnGetter(pageBuilder, timestampParsers);
|
55
58
|
case ARRAY:
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class DoubleColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Double value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public DoubleColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -51,8 +54,8 @@ public class DoubleColumnGetter extends BaseColumnGetter {
|
|
51
54
|
pageBuilder.setNull(column);
|
52
55
|
}
|
53
56
|
else {
|
54
|
-
|
55
|
-
pageBuilder.setTimestamp(column,
|
57
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
58
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value));
|
56
59
|
}
|
57
60
|
}
|
58
61
|
}
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class FloatColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Float value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public FloatColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -51,8 +54,8 @@ public class FloatColumnGetter extends BaseColumnGetter {
|
|
51
54
|
pageBuilder.setNull(column);
|
52
55
|
}
|
53
56
|
else {
|
54
|
-
|
55
|
-
pageBuilder.setTimestamp(column,
|
57
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
58
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value.doubleValue()));
|
56
59
|
}
|
57
60
|
}
|
58
61
|
}
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class IntegerColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Integer value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -51,7 +54,8 @@ public class IntegerColumnGetter extends BaseColumnGetter {
|
|
51
54
|
pageBuilder.setNull(column);
|
52
55
|
}
|
53
56
|
else {
|
54
|
-
|
57
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
58
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value.longValue()));
|
55
59
|
}
|
56
60
|
}
|
57
61
|
}
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class LongColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Long value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -52,7 +55,8 @@ public class LongColumnGetter extends BaseColumnGetter {
|
|
52
55
|
pageBuilder.setNull(column);
|
53
56
|
}
|
54
57
|
else {
|
55
|
-
|
58
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
59
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value));
|
56
60
|
}
|
57
61
|
}
|
58
62
|
}
|
@@ -34,6 +34,7 @@ import static org.embulk.spi.type.Types.JSON;
|
|
34
34
|
import static org.embulk.spi.type.Types.TIMESTAMP;
|
35
35
|
import static org.junit.Assert.assertEquals;
|
36
36
|
import static org.junit.Assert.assertNull;
|
37
|
+
import static org.junit.Assert.assertTrue;
|
37
38
|
|
38
39
|
public class TestAvroParserPlugin
|
39
40
|
{
|
@@ -50,6 +51,7 @@ public class TestAvroParserPlugin
|
|
50
51
|
{
|
51
52
|
config = config().set("type", "avro");
|
52
53
|
plugin = new AvroParserPlugin();
|
54
|
+
|
53
55
|
recreatePageOutput();
|
54
56
|
}
|
55
57
|
|
@@ -69,7 +71,7 @@ public class TestAvroParserPlugin
|
|
69
71
|
column("options", JSON),
|
70
72
|
column("spec", JSON),
|
71
73
|
column("created_at", TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S%:z")),
|
72
|
-
column("created_at_utc", TIMESTAMP)
|
74
|
+
column("created_at_utc", TIMESTAMP, config().set("timestamp_unit", "second"))
|
73
75
|
);
|
74
76
|
|
75
77
|
ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item.avsc").getPath());
|
@@ -89,7 +91,44 @@ public class TestAvroParserPlugin
|
|
89
91
|
assertEquals("bar", ((MapValue)record[8]).map().get(ValueFactory.newString("foo")).toString());
|
90
92
|
assertEquals("opt1", ((MapValue)record[9]).map().get(ValueFactory.newString("key")).toString());
|
91
93
|
assertEquals("2016-05-08 19:35:43 UTC", record[10].toString());
|
92
|
-
assertEquals("2016-05-08 19:35:
|
94
|
+
assertEquals("2016-05-08 19:35:28 UTC", record[11].toString());
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void useTimestampUnit()
|
99
|
+
throws Exception
|
100
|
+
{
|
101
|
+
SchemaConfig schema = schema(
|
102
|
+
column("timestamp", TIMESTAMP, config().set("timestamp_unit", "second")),
|
103
|
+
column("timestamp_long", TIMESTAMP, config().set("timestamp_unit", "second")),
|
104
|
+
column("timestamp_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
|
105
|
+
column("timestamp_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
|
106
|
+
column("timestamp_nano", TIMESTAMP, config().set("timestamp_unit", "nano")),
|
107
|
+
column("timestamp_float", TIMESTAMP, config().set("timestamp_unit", "second")),
|
108
|
+
column("timestamp_double", TIMESTAMP, config().set("timestamp_unit", "second")),
|
109
|
+
column("timestamp_double_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
|
110
|
+
column("timestamp_double_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
|
111
|
+
column("timestamp_double_nano", TIMESTAMP, config().set("timestamp_unit", "nano"))
|
112
|
+
);
|
113
|
+
|
114
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item2.avsc").getPath());
|
115
|
+
|
116
|
+
transaction(config, fileInput(new File(this.getClass().getResource("items2.avro").getPath())));
|
117
|
+
|
118
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
119
|
+
assertEquals(1, records.size());
|
120
|
+
|
121
|
+
Object[] record = records.get(0);
|
122
|
+
assertEquals("2018-02-23 12:13:52 UTC", record[0].toString());
|
123
|
+
assertEquals("2018-02-23 12:13:52 UTC", record[1].toString());
|
124
|
+
assertEquals("2018-02-23 12:13:52.717 UTC", record[2].toString());
|
125
|
+
assertEquals("2018-02-23 12:13:52.717249 UTC", record[3].toString());
|
126
|
+
assertEquals("2018-02-23 12:13:52.717249634 UTC", record[4].toString());
|
127
|
+
assertEquals("2018-02-23 12:13:52 UTC", record[5].toString());
|
128
|
+
assertTrue(record[6].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
129
|
+
assertTrue(record[7].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
130
|
+
assertTrue(record[8].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
131
|
+
assertTrue(record[9].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
93
132
|
}
|
94
133
|
|
95
134
|
@Test
|
@@ -0,0 +1 @@
|
|
1
|
+
.bundle
|
@@ -0,0 +1,6 @@
|
|
1
|
+
{"id":1,"code":123456789012345678,"name":"Desktop","description":"Office and Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":30000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{"hoge":null,"foo":"bar"},"item_type":"D","dummy":null}
|
2
|
+
{"id":2,"code":123456789012345679,"name":"Laptop","description":null,"flag":false,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":50000.0,"spec":{"key":"opt1","value":null},"tags":null,"options":{},"item_type":"M","dummy":null}
|
3
|
+
{"id":3,"code":123456789012345680,"name":"Tablet","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":null,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag3"],"options":{},"item_type":"M","dummy":null}
|
4
|
+
{"id":4,"code":123456789012345681,"name":"Mobile","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":10000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":[],"options":{},"item_type":"M","dummy":null}
|
5
|
+
{"id":5,"code":123456789012345682,"name":"Notepad","description":null,"flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":20000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
|
6
|
+
{"id":6,"code":123456789012345683,"name":"SmartPhone","description":"Multipurpose","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":40000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"timestamp": 1519388032, "timestamp_long": 1519388032, "timestamp_milli": 1519388032717, "timestamp_micro": 1519388032717249, "timestamp_nano": 1519388032717249634, "timestamp_float": 1519388032.7172496, "timestamp_float_milli": 1519388032717.2496, "timestamp_float_micro": 1519388032717249.6, "timestamp_float_nano": 1519388032717249634.0,"timestamp_double": 1519388032.717249634, "timestamp_double_milli": 1519388032717.249634, "timestamp_double_micro": 1519388032717249.634, "timestamp_double_nano": 1519388032717249634.0}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'avro'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
schema = Avro::Schema.parse(File.read(ARGV[0]))
|
5
|
+
file = File.open(ARGV[1], 'wb')
|
6
|
+
writer = Avro::IO::DatumWriter.new(schema)
|
7
|
+
dw = Avro::DataFile::Writer.new(file, writer, schema)
|
8
|
+
|
9
|
+
data = File.read(ARGV[2]).each_line.map do |l|
|
10
|
+
JSON.load(l)
|
11
|
+
end
|
12
|
+
|
13
|
+
data.each do |d|
|
14
|
+
dw << d
|
15
|
+
end
|
16
|
+
|
17
|
+
dw.close
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"type" : "record",
|
3
|
+
"name" : "Item",
|
4
|
+
"namespace" : "example.avro",
|
5
|
+
"fields" : [
|
6
|
+
{"name": "timestamp", "type": "int"},
|
7
|
+
{"name": "timestamp_long", "type": "long"},
|
8
|
+
{"name": "timestamp_milli", "type": "long"},
|
9
|
+
{"name": "timestamp_micro", "type": "long"},
|
10
|
+
{"name": "timestamp_nano", "type": "long"},
|
11
|
+
{"name": "timestamp_float", "type": "float"},
|
12
|
+
{"name": "timestamp_float_milli", "type": "float"},
|
13
|
+
{"name": "timestamp_float_micro", "type": "float"},
|
14
|
+
{"name": "timestamp_float_nano", "type": "float"},
|
15
|
+
{"name": "timestamp_double", "type": "double"},
|
16
|
+
{"name": "timestamp_double_milli", "type": "double"},
|
17
|
+
{"name": "timestamp_double_micro", "type": "double"},
|
18
|
+
{"name": "timestamp_double_nano", "type": "double"}
|
19
|
+
]
|
20
|
+
}
|
Binary file
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Parses Avro files read by other file input plugins.
|
42
42
|
email:
|
43
43
|
- kakyoin.hierophant@gmail.com
|
@@ -64,6 +64,8 @@ files:
|
|
64
64
|
- lib/embulk/guess/avro.rb
|
65
65
|
- lib/embulk/parser/avro.rb
|
66
66
|
- src/main/java/org/embulk/parser/avro/AvroParserPlugin.java
|
67
|
+
- src/main/java/org/embulk/parser/avro/TimestampUnit.java
|
68
|
+
- src/main/java/org/embulk/parser/avro/TimestampUnitDeserializer.java
|
67
69
|
- src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java
|
68
70
|
- src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java
|
69
71
|
- src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java
|
@@ -75,16 +77,24 @@ files:
|
|
75
77
|
- src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java
|
76
78
|
- src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java
|
77
79
|
- src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java
|
80
|
+
- src/test/resources/org/embulk/parser/avro/.gitignore
|
81
|
+
- src/test/resources/org/embulk/parser/avro/Gemfile
|
82
|
+
- src/test/resources/org/embulk/parser/avro/Gemfile.lock
|
83
|
+
- src/test/resources/org/embulk/parser/avro/data.json
|
84
|
+
- src/test/resources/org/embulk/parser/avro/data2.json
|
85
|
+
- src/test/resources/org/embulk/parser/avro/data_creator.rb
|
78
86
|
- src/test/resources/org/embulk/parser/avro/item.avsc
|
87
|
+
- src/test/resources/org/embulk/parser/avro/item2.avsc
|
79
88
|
- src/test/resources/org/embulk/parser/avro/items.avro
|
80
|
-
-
|
89
|
+
- src/test/resources/org/embulk/parser/avro/items2.avro
|
90
|
+
- classpath/avro-1.8.2.jar
|
91
|
+
- classpath/paranamer-2.7.jar
|
92
|
+
- classpath/xz-1.5.jar
|
81
93
|
- classpath/commons-compress-1.8.1.jar
|
82
|
-
- classpath/embulk-parser-avro-0.
|
94
|
+
- classpath/embulk-parser-avro-0.3.0.jar
|
83
95
|
- classpath/jackson-core-asl-1.9.13.jar
|
84
|
-
- classpath/jackson-mapper-asl-1.9.13.jar
|
85
|
-
- classpath/paranamer-2.7.jar
|
86
96
|
- classpath/snappy-java-1.1.1.3.jar
|
87
|
-
- classpath/
|
97
|
+
- classpath/jackson-mapper-asl-1.9.13.jar
|
88
98
|
homepage: https://github.com/joker1007/embulk-parser-avro
|
89
99
|
licenses:
|
90
100
|
- MIT
|