embulk-parser-avro 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +27 -1
- data/build.gradle +8 -8
- data/src/main/java/org/embulk/parser/avro/AvroParserPlugin.java +24 -1
- data/src/main/java/org/embulk/parser/avro/TimestampUnit.java +70 -0
- data/src/main/java/org/embulk/parser/avro/TimestampUnitDeserializer.java +55 -0
- data/src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java +8 -5
- data/src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java +7 -4
- data/src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java +7 -4
- data/src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java +7 -3
- data/src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java +7 -3
- data/src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java +41 -2
- data/src/test/resources/org/embulk/parser/avro/.gitignore +1 -0
- data/src/test/resources/org/embulk/parser/avro/Gemfile +7 -0
- data/src/test/resources/org/embulk/parser/avro/Gemfile.lock +15 -0
- data/src/test/resources/org/embulk/parser/avro/data.json +6 -0
- data/src/test/resources/org/embulk/parser/avro/data2.json +1 -0
- data/src/test/resources/org/embulk/parser/avro/data_creator.rb +17 -0
- data/src/test/resources/org/embulk/parser/avro/item2.avsc +20 -0
- data/src/test/resources/org/embulk/parser/avro/items2.avro +0 -0
- metadata +27 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ee00ffbaf0cf78dfc2c7bd0861e68603b528565
|
4
|
+
data.tar.gz: 08786ae000735a7f8d2aad1514a02de9c5095ec4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2697fb6b9cd4fb2cf6a72194450031f11ade7780d2afb050d40d65de39a10a5b56da507125b5580de820ba78e399ff39348726bb0eb82edb0c7aae13de27a555
|
7
|
+
data.tar.gz: 839d69c7a19dfc6eeddff3b4fb9f2bee3172b1d4ab05793753bae1fb466ef1bfa91c5e86e099f3fe44bc5c0e0047ed3a34eb789ae26c7e0507b29c25a58ce4ca
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -12,11 +12,37 @@
|
|
12
12
|
- **type**: Specify this parser as avro
|
13
13
|
- **avsc**: Specify avro schema file.
|
14
14
|
- **columns**: Specify column name and type. See below (array, optional)
|
15
|
+
- timestamp_unit: Specify unit of time. (This config is effective only if avro value is `long`, `int`, `float`, `double`)
|
15
16
|
* **default_timezone**: Default timezone of the timestamp (string, default: UTC)
|
16
17
|
* **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`)
|
17
18
|
|
18
19
|
If columns is not set, this plugin detect schema automatically by using avsc schema.
|
19
20
|
|
21
|
+
support `timestamp_unit` type is below.
|
22
|
+
|
23
|
+
- "Second"
|
24
|
+
- "second"
|
25
|
+
- "sec"
|
26
|
+
- "s"
|
27
|
+
- "MilliSecond"
|
28
|
+
- "millisecond"
|
29
|
+
- "milli_second"
|
30
|
+
- "milli"
|
31
|
+
- "msec"
|
32
|
+
- "ms"
|
33
|
+
- "MicroSecond"
|
34
|
+
- "microsecond"
|
35
|
+
- "micro_second"
|
36
|
+
- "micro"
|
37
|
+
- "usec"
|
38
|
+
- "us"
|
39
|
+
- "NanoSecond"
|
40
|
+
- "nanosecond"
|
41
|
+
- "nano_second"
|
42
|
+
- "nano"
|
43
|
+
- "nsec"
|
44
|
+
- "ns"
|
45
|
+
|
20
46
|
## Example
|
21
47
|
|
22
48
|
```yaml
|
@@ -38,7 +64,7 @@ in:
|
|
38
64
|
- {name: "options", type: "json"}
|
39
65
|
- {name: "spec", type: "json"}
|
40
66
|
- {name: "created_at", type: "timestamp", format: "%Y-%m-%dT%H:%M:%S%:z"}
|
41
|
-
- {name: "created_at_utc", type: "timestamp"}
|
67
|
+
- {name: "created_at_utc", type: "timestamp", timestamp_unit: "second"}
|
42
68
|
|
43
69
|
out:
|
44
70
|
type: stdout
|
data/build.gradle
CHANGED
@@ -13,19 +13,19 @@ configurations {
|
|
13
13
|
provided
|
14
14
|
}
|
15
15
|
|
16
|
-
version = "0.
|
16
|
+
version = "0.3.0"
|
17
17
|
|
18
|
-
sourceCompatibility = 1.
|
19
|
-
targetCompatibility = 1.
|
18
|
+
sourceCompatibility = 1.8
|
19
|
+
targetCompatibility = 1.8
|
20
20
|
|
21
21
|
dependencies {
|
22
|
-
compile "org.embulk:embulk-core:0.
|
23
|
-
provided "org.embulk:embulk-core:0.
|
24
|
-
compile "org.apache.avro:avro:1.8.
|
22
|
+
compile "org.embulk:embulk-core:0.9.4"
|
23
|
+
provided "org.embulk:embulk-core:0.9.4"
|
24
|
+
compile "org.apache.avro:avro:1.8.2"
|
25
25
|
testCompile "junit:junit:4.+"
|
26
26
|
|
27
|
-
testCompile "org.embulk:embulk-core:0.
|
28
|
-
testCompile "org.embulk:embulk-standards:0.
|
27
|
+
testCompile "org.embulk:embulk-core:0.9.4:tests"
|
28
|
+
testCompile "org.embulk:embulk-standards:0.9.4"
|
29
29
|
}
|
30
30
|
|
31
31
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -15,6 +15,7 @@ import org.embulk.config.TaskSource;
|
|
15
15
|
import org.embulk.parser.avro.getter.BaseColumnGetter;
|
16
16
|
import org.embulk.parser.avro.getter.ColumnGetterFactory;
|
17
17
|
import org.embulk.spi.Column;
|
18
|
+
import org.embulk.spi.ColumnConfig;
|
18
19
|
import org.embulk.spi.Exec;
|
19
20
|
import org.embulk.spi.FileInput;
|
20
21
|
import org.embulk.spi.PageBuilder;
|
@@ -23,6 +24,7 @@ import org.embulk.spi.ParserPlugin;
|
|
23
24
|
import org.embulk.spi.Schema;
|
24
25
|
import org.embulk.spi.SchemaConfig;
|
25
26
|
import org.embulk.spi.time.TimestampParser;
|
27
|
+
import org.embulk.spi.type.TimestampType;
|
26
28
|
import org.embulk.spi.type.Types;
|
27
29
|
import org.embulk.spi.unit.LocalFile;
|
28
30
|
import org.embulk.spi.util.FileInputInputStream;
|
@@ -46,6 +48,13 @@ public class AvroParserPlugin
|
|
46
48
|
LocalFile getAvsc();
|
47
49
|
}
|
48
50
|
|
51
|
+
public interface TimestampUnitConfig extends Task
|
52
|
+
{
|
53
|
+
@Config("timestamp_unit")
|
54
|
+
@ConfigDefault("\"second\"")
|
55
|
+
public TimestampUnit getTimestampUnit();
|
56
|
+
}
|
57
|
+
|
49
58
|
@Override
|
50
59
|
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
51
60
|
{
|
@@ -128,6 +137,7 @@ public class AvroParserPlugin
|
|
128
137
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
129
138
|
List<Column> columns = schema.getColumns();
|
130
139
|
final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns());
|
140
|
+
final TimestampUnit[] timestampUnits = newTimestampUnits(task.getColumns());
|
131
141
|
File avsc = task.getAvsc().getFile();
|
132
142
|
final org.apache.avro.Schema avroSchema;
|
133
143
|
try {
|
@@ -137,7 +147,7 @@ public class AvroParserPlugin
|
|
137
147
|
}
|
138
148
|
|
139
149
|
try (FileInputInputStream is = new FileInputInputStream(input); final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
140
|
-
ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers);
|
150
|
+
ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers, timestampUnits);
|
141
151
|
ImmutableMap.Builder<String, BaseColumnGetter> columnGettersBuilder = ImmutableMap.builder();
|
142
152
|
for (Column column : columns) {
|
143
153
|
BaseColumnGetter columnGetter = factory.newColumnGetter(column);
|
@@ -165,4 +175,17 @@ public class AvroParserPlugin
|
|
165
175
|
throw new RuntimeException(e);
|
166
176
|
}
|
167
177
|
}
|
178
|
+
|
179
|
+
private TimestampUnit[] newTimestampUnits(SchemaConfig columns) {
|
180
|
+
TimestampUnit[] units = new TimestampUnit[columns.getColumnCount()];
|
181
|
+
int i = 0;
|
182
|
+
for (ColumnConfig column : columns.getColumns()) {
|
183
|
+
if (column.getType() instanceof TimestampType) {
|
184
|
+
TimestampUnitConfig option = column.getOption().loadConfig(TimestampUnitConfig.class);
|
185
|
+
units[i] = option.getTimestampUnit();
|
186
|
+
}
|
187
|
+
i++;
|
188
|
+
}
|
189
|
+
return units;
|
190
|
+
}
|
168
191
|
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
package org.embulk.parser.avro;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
|
6
|
+
|
7
|
+
@JsonDeserialize(using=TimestampUnitDeserializer.class)
|
8
|
+
public enum TimestampUnit {
|
9
|
+
Second {
|
10
|
+
@Override
|
11
|
+
public Timestamp toTimestamp(Long value)
|
12
|
+
{
|
13
|
+
return Timestamp.ofEpochSecond(value);
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public Timestamp toTimestamp(Double value)
|
18
|
+
{
|
19
|
+
long sec = value.longValue();
|
20
|
+
double rest = value - sec;
|
21
|
+
return Timestamp.ofEpochSecond(0, sec * 1000000000L + (long) (rest * 1000000000L));
|
22
|
+
}
|
23
|
+
},
|
24
|
+
MilliSecond {
|
25
|
+
@Override
|
26
|
+
public Timestamp toTimestamp(Long value)
|
27
|
+
{
|
28
|
+
return Timestamp.ofEpochSecond(0, value * 1000000L);
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public Timestamp toTimestamp(Double value)
|
33
|
+
{
|
34
|
+
long sec = value.longValue();
|
35
|
+
double rest = value - sec;
|
36
|
+
return Timestamp.ofEpochSecond(0, sec * 1000000L + (long) (rest * 1000000L));
|
37
|
+
}
|
38
|
+
},
|
39
|
+
MicroSecond {
|
40
|
+
@Override
|
41
|
+
public Timestamp toTimestamp(Long value)
|
42
|
+
{
|
43
|
+
return Timestamp.ofEpochSecond(0, value * 1000L);
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public Timestamp toTimestamp(Double value)
|
48
|
+
{
|
49
|
+
long sec = value.longValue();
|
50
|
+
double rest = value - sec;
|
51
|
+
return Timestamp.ofEpochSecond(0, sec * 1000L + (long) (rest * 1000L));
|
52
|
+
}
|
53
|
+
},
|
54
|
+
NanoSecond {
|
55
|
+
@Override
|
56
|
+
public Timestamp toTimestamp(Long value)
|
57
|
+
{
|
58
|
+
return Timestamp.ofEpochSecond(0, value);
|
59
|
+
}
|
60
|
+
|
61
|
+
@Override
|
62
|
+
public Timestamp toTimestamp(Double value)
|
63
|
+
{
|
64
|
+
return Timestamp.ofEpochSecond(0, value.longValue());
|
65
|
+
}
|
66
|
+
};
|
67
|
+
|
68
|
+
abstract public Timestamp toTimestamp(Long value);
|
69
|
+
abstract public Timestamp toTimestamp(Double value);
|
70
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
package org.embulk.parser.avro;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.core.JsonProcessingException;
|
4
|
+
import com.fasterxml.jackson.databind.DeserializationContext;
|
5
|
+
import com.fasterxml.jackson.databind.JsonMappingException;
|
6
|
+
import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer;
|
7
|
+
import com.google.common.base.Joiner;
|
8
|
+
import com.google.common.collect.ImmutableMap;
|
9
|
+
|
10
|
+
import java.io.IOException;
|
11
|
+
|
12
|
+
public class TimestampUnitDeserializer extends FromStringDeserializer<TimestampUnit>
|
13
|
+
{
|
14
|
+
|
15
|
+
public static ImmutableMap<String, TimestampUnit> mapping = ImmutableMap.<String, TimestampUnit>builder()
|
16
|
+
.put("Second", TimestampUnit.Second)
|
17
|
+
.put("second", TimestampUnit.Second)
|
18
|
+
.put("sec", TimestampUnit.Second)
|
19
|
+
.put("s", TimestampUnit.Second)
|
20
|
+
.put("MilliSecond", TimestampUnit.MilliSecond)
|
21
|
+
.put("millisecond", TimestampUnit.MilliSecond)
|
22
|
+
.put("milli_second", TimestampUnit.MilliSecond)
|
23
|
+
.put("milli", TimestampUnit.MilliSecond)
|
24
|
+
.put("msec", TimestampUnit.MilliSecond)
|
25
|
+
.put("ms", TimestampUnit.MilliSecond)
|
26
|
+
.put("MicroSecond", TimestampUnit.MicroSecond)
|
27
|
+
.put("microsecond", TimestampUnit.MicroSecond)
|
28
|
+
.put("micro_second", TimestampUnit.MicroSecond)
|
29
|
+
.put("micro", TimestampUnit.MicroSecond)
|
30
|
+
.put("usec", TimestampUnit.MicroSecond)
|
31
|
+
.put("us", TimestampUnit.MicroSecond)
|
32
|
+
.put("NanoSecond", TimestampUnit.NanoSecond)
|
33
|
+
.put("nanosecond", TimestampUnit.NanoSecond)
|
34
|
+
.put("nano_second", TimestampUnit.NanoSecond)
|
35
|
+
.put("nano", TimestampUnit.NanoSecond)
|
36
|
+
.put("nsec", TimestampUnit.NanoSecond)
|
37
|
+
.put("ns", TimestampUnit.NanoSecond)
|
38
|
+
.build();
|
39
|
+
|
40
|
+
public TimestampUnitDeserializer() {
|
41
|
+
super(TimestampUnit.class);
|
42
|
+
}
|
43
|
+
|
44
|
+
@Override
|
45
|
+
protected TimestampUnit _deserialize(String value, DeserializationContext ctxt) throws IOException, JsonProcessingException {
|
46
|
+
TimestampUnit unit = mapping.get(value);
|
47
|
+
if (unit == null) {
|
48
|
+
throw new JsonMappingException(
|
49
|
+
String.format("Unknown type name '%s'. Supported types are: %s",
|
50
|
+
value,
|
51
|
+
Joiner.on(", ").join(mapping.keySet())));
|
52
|
+
}
|
53
|
+
return unit;
|
54
|
+
}
|
55
|
+
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
3
|
import org.apache.avro.Schema;
|
4
|
+
import org.embulk.parser.avro.TimestampUnit;
|
4
5
|
import org.embulk.spi.Column;
|
5
6
|
import org.embulk.spi.DataException;
|
6
7
|
import org.embulk.spi.PageBuilder;
|
@@ -10,12 +11,14 @@ public class ColumnGetterFactory {
|
|
10
11
|
private org.apache.avro.Schema avroSchema;
|
11
12
|
private PageBuilder pageBuilder;
|
12
13
|
private TimestampParser[] timestampParsers;
|
14
|
+
private TimestampUnit[] timestampUnits;
|
13
15
|
|
14
|
-
public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
|
16
|
+
public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits)
|
15
17
|
{
|
16
18
|
this.avroSchema = avroSchema;
|
17
19
|
this.pageBuilder = pageBuilder;
|
18
20
|
this.timestampParsers = timestampParsers;
|
21
|
+
this.timestampUnits = timestampUnits;
|
19
22
|
}
|
20
23
|
|
21
24
|
public BaseColumnGetter newColumnGetter(Column column)
|
@@ -43,13 +46,13 @@ public class ColumnGetterFactory {
|
|
43
46
|
case ENUM:
|
44
47
|
return new StringColumnGetter(pageBuilder, timestampParsers);
|
45
48
|
case INT:
|
46
|
-
return new IntegerColumnGetter(pageBuilder, timestampParsers);
|
49
|
+
return new IntegerColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
47
50
|
case LONG:
|
48
|
-
return new LongColumnGetter(pageBuilder, timestampParsers);
|
51
|
+
return new LongColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
49
52
|
case FLOAT:
|
50
|
-
return new FloatColumnGetter(pageBuilder, timestampParsers);
|
53
|
+
return new FloatColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
51
54
|
case DOUBLE:
|
52
|
-
return new DoubleColumnGetter(pageBuilder, timestampParsers);
|
55
|
+
return new DoubleColumnGetter(pageBuilder, timestampParsers, timestampUnits);
|
53
56
|
case BOOLEAN:
|
54
57
|
return new BooleanColumnGetter(pageBuilder, timestampParsers);
|
55
58
|
case ARRAY:
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class DoubleColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Double value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public DoubleColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -51,8 +54,8 @@ public class DoubleColumnGetter extends BaseColumnGetter {
|
|
51
54
|
pageBuilder.setNull(column);
|
52
55
|
}
|
53
56
|
else {
|
54
|
-
|
55
|
-
pageBuilder.setTimestamp(column,
|
57
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
58
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value));
|
56
59
|
}
|
57
60
|
}
|
58
61
|
}
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class FloatColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Float value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public FloatColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -51,8 +54,8 @@ public class FloatColumnGetter extends BaseColumnGetter {
|
|
51
54
|
pageBuilder.setNull(column);
|
52
55
|
}
|
53
56
|
else {
|
54
|
-
|
55
|
-
pageBuilder.setTimestamp(column,
|
57
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
58
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value.doubleValue()));
|
56
59
|
}
|
57
60
|
}
|
58
61
|
}
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class IntegerColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Integer value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -51,7 +54,8 @@ public class IntegerColumnGetter extends BaseColumnGetter {
|
|
51
54
|
pageBuilder.setNull(column);
|
52
55
|
}
|
53
56
|
else {
|
54
|
-
|
57
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
58
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value.longValue()));
|
55
59
|
}
|
56
60
|
}
|
57
61
|
}
|
@@ -1,15 +1,18 @@
|
|
1
1
|
package org.embulk.parser.avro.getter;
|
2
2
|
|
3
|
+
import org.embulk.parser.avro.TimestampUnit;
|
3
4
|
import org.embulk.spi.Column;
|
4
5
|
import org.embulk.spi.PageBuilder;
|
5
|
-
import org.embulk.spi.time.Timestamp;
|
6
6
|
import org.embulk.spi.time.TimestampParser;
|
7
7
|
|
8
8
|
public class LongColumnGetter extends BaseColumnGetter {
|
9
9
|
protected Long value;
|
10
10
|
|
11
|
-
|
11
|
+
private final TimestampUnit[] timestampUnits;
|
12
|
+
|
13
|
+
public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
|
12
14
|
super(pageBuilder, timestampParsers);
|
15
|
+
this.timestampUnits = timestampUnits;
|
13
16
|
}
|
14
17
|
|
15
18
|
@Override
|
@@ -52,7 +55,8 @@ public class LongColumnGetter extends BaseColumnGetter {
|
|
52
55
|
pageBuilder.setNull(column);
|
53
56
|
}
|
54
57
|
else {
|
55
|
-
|
58
|
+
TimestampUnit unit = timestampUnits[column.getIndex()];
|
59
|
+
pageBuilder.setTimestamp(column, unit.toTimestamp(value));
|
56
60
|
}
|
57
61
|
}
|
58
62
|
}
|
@@ -34,6 +34,7 @@ import static org.embulk.spi.type.Types.JSON;
|
|
34
34
|
import static org.embulk.spi.type.Types.TIMESTAMP;
|
35
35
|
import static org.junit.Assert.assertEquals;
|
36
36
|
import static org.junit.Assert.assertNull;
|
37
|
+
import static org.junit.Assert.assertTrue;
|
37
38
|
|
38
39
|
public class TestAvroParserPlugin
|
39
40
|
{
|
@@ -50,6 +51,7 @@ public class TestAvroParserPlugin
|
|
50
51
|
{
|
51
52
|
config = config().set("type", "avro");
|
52
53
|
plugin = new AvroParserPlugin();
|
54
|
+
|
53
55
|
recreatePageOutput();
|
54
56
|
}
|
55
57
|
|
@@ -69,7 +71,7 @@ public class TestAvroParserPlugin
|
|
69
71
|
column("options", JSON),
|
70
72
|
column("spec", JSON),
|
71
73
|
column("created_at", TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S%:z")),
|
72
|
-
column("created_at_utc", TIMESTAMP)
|
74
|
+
column("created_at_utc", TIMESTAMP, config().set("timestamp_unit", "second"))
|
73
75
|
);
|
74
76
|
|
75
77
|
ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item.avsc").getPath());
|
@@ -89,7 +91,44 @@ public class TestAvroParserPlugin
|
|
89
91
|
assertEquals("bar", ((MapValue)record[8]).map().get(ValueFactory.newString("foo")).toString());
|
90
92
|
assertEquals("opt1", ((MapValue)record[9]).map().get(ValueFactory.newString("key")).toString());
|
91
93
|
assertEquals("2016-05-08 19:35:43 UTC", record[10].toString());
|
92
|
-
assertEquals("2016-05-08 19:35:
|
94
|
+
assertEquals("2016-05-08 19:35:28 UTC", record[11].toString());
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void useTimestampUnit()
|
99
|
+
throws Exception
|
100
|
+
{
|
101
|
+
SchemaConfig schema = schema(
|
102
|
+
column("timestamp", TIMESTAMP, config().set("timestamp_unit", "second")),
|
103
|
+
column("timestamp_long", TIMESTAMP, config().set("timestamp_unit", "second")),
|
104
|
+
column("timestamp_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
|
105
|
+
column("timestamp_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
|
106
|
+
column("timestamp_nano", TIMESTAMP, config().set("timestamp_unit", "nano")),
|
107
|
+
column("timestamp_float", TIMESTAMP, config().set("timestamp_unit", "second")),
|
108
|
+
column("timestamp_double", TIMESTAMP, config().set("timestamp_unit", "second")),
|
109
|
+
column("timestamp_double_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
|
110
|
+
column("timestamp_double_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
|
111
|
+
column("timestamp_double_nano", TIMESTAMP, config().set("timestamp_unit", "nano"))
|
112
|
+
);
|
113
|
+
|
114
|
+
ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item2.avsc").getPath());
|
115
|
+
|
116
|
+
transaction(config, fileInput(new File(this.getClass().getResource("items2.avro").getPath())));
|
117
|
+
|
118
|
+
List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
|
119
|
+
assertEquals(1, records.size());
|
120
|
+
|
121
|
+
Object[] record = records.get(0);
|
122
|
+
assertEquals("2018-02-23 12:13:52 UTC", record[0].toString());
|
123
|
+
assertEquals("2018-02-23 12:13:52 UTC", record[1].toString());
|
124
|
+
assertEquals("2018-02-23 12:13:52.717 UTC", record[2].toString());
|
125
|
+
assertEquals("2018-02-23 12:13:52.717249 UTC", record[3].toString());
|
126
|
+
assertEquals("2018-02-23 12:13:52.717249634 UTC", record[4].toString());
|
127
|
+
assertEquals("2018-02-23 12:13:52 UTC", record[5].toString());
|
128
|
+
assertTrue(record[6].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
129
|
+
assertTrue(record[7].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
130
|
+
assertTrue(record[8].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
131
|
+
assertTrue(record[9].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
|
93
132
|
}
|
94
133
|
|
95
134
|
@Test
|
@@ -0,0 +1 @@
|
|
1
|
+
.bundle
|
@@ -0,0 +1,6 @@
|
|
1
|
+
{"id":1,"code":123456789012345678,"name":"Desktop","description":"Office and Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":30000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{"hoge":null,"foo":"bar"},"item_type":"D","dummy":null}
|
2
|
+
{"id":2,"code":123456789012345679,"name":"Laptop","description":null,"flag":false,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":50000.0,"spec":{"key":"opt1","value":null},"tags":null,"options":{},"item_type":"M","dummy":null}
|
3
|
+
{"id":3,"code":123456789012345680,"name":"Tablet","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":null,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag3"],"options":{},"item_type":"M","dummy":null}
|
4
|
+
{"id":4,"code":123456789012345681,"name":"Mobile","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":10000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":[],"options":{},"item_type":"M","dummy":null}
|
5
|
+
{"id":5,"code":123456789012345682,"name":"Notepad","description":null,"flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":20000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
|
6
|
+
{"id":6,"code":123456789012345683,"name":"SmartPhone","description":"Multipurpose","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":40000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"timestamp": 1519388032, "timestamp_long": 1519388032, "timestamp_milli": 1519388032717, "timestamp_micro": 1519388032717249, "timestamp_nano": 1519388032717249634, "timestamp_float": 1519388032.7172496, "timestamp_float_milli": 1519388032717.2496, "timestamp_float_micro": 1519388032717249.6, "timestamp_float_nano": 1519388032717249634.0,"timestamp_double": 1519388032.717249634, "timestamp_double_milli": 1519388032717.249634, "timestamp_double_micro": 1519388032717249.634, "timestamp_double_nano": 1519388032717249634.0}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'avro'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
schema = Avro::Schema.parse(File.read(ARGV[0]))
|
5
|
+
file = File.open(ARGV[1], 'wb')
|
6
|
+
writer = Avro::IO::DatumWriter.new(schema)
|
7
|
+
dw = Avro::DataFile::Writer.new(file, writer, schema)
|
8
|
+
|
9
|
+
data = File.read(ARGV[2]).each_line.map do |l|
|
10
|
+
JSON.load(l)
|
11
|
+
end
|
12
|
+
|
13
|
+
data.each do |d|
|
14
|
+
dw << d
|
15
|
+
end
|
16
|
+
|
17
|
+
dw.close
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
"type" : "record",
|
3
|
+
"name" : "Item",
|
4
|
+
"namespace" : "example.avro",
|
5
|
+
"fields" : [
|
6
|
+
{"name": "timestamp", "type": "int"},
|
7
|
+
{"name": "timestamp_long", "type": "long"},
|
8
|
+
{"name": "timestamp_milli", "type": "long"},
|
9
|
+
{"name": "timestamp_micro", "type": "long"},
|
10
|
+
{"name": "timestamp_nano", "type": "long"},
|
11
|
+
{"name": "timestamp_float", "type": "float"},
|
12
|
+
{"name": "timestamp_float_milli", "type": "float"},
|
13
|
+
{"name": "timestamp_float_micro", "type": "float"},
|
14
|
+
{"name": "timestamp_float_nano", "type": "float"},
|
15
|
+
{"name": "timestamp_double", "type": "double"},
|
16
|
+
{"name": "timestamp_double_milli", "type": "double"},
|
17
|
+
{"name": "timestamp_double_micro", "type": "double"},
|
18
|
+
{"name": "timestamp_double_nano", "type": "double"}
|
19
|
+
]
|
20
|
+
}
|
Binary file
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: Parses Avro files read by other file input plugins.
|
42
42
|
email:
|
43
43
|
- kakyoin.hierophant@gmail.com
|
@@ -64,6 +64,8 @@ files:
|
|
64
64
|
- lib/embulk/guess/avro.rb
|
65
65
|
- lib/embulk/parser/avro.rb
|
66
66
|
- src/main/java/org/embulk/parser/avro/AvroParserPlugin.java
|
67
|
+
- src/main/java/org/embulk/parser/avro/TimestampUnit.java
|
68
|
+
- src/main/java/org/embulk/parser/avro/TimestampUnitDeserializer.java
|
67
69
|
- src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java
|
68
70
|
- src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java
|
69
71
|
- src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java
|
@@ -75,16 +77,24 @@ files:
|
|
75
77
|
- src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java
|
76
78
|
- src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java
|
77
79
|
- src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java
|
80
|
+
- src/test/resources/org/embulk/parser/avro/.gitignore
|
81
|
+
- src/test/resources/org/embulk/parser/avro/Gemfile
|
82
|
+
- src/test/resources/org/embulk/parser/avro/Gemfile.lock
|
83
|
+
- src/test/resources/org/embulk/parser/avro/data.json
|
84
|
+
- src/test/resources/org/embulk/parser/avro/data2.json
|
85
|
+
- src/test/resources/org/embulk/parser/avro/data_creator.rb
|
78
86
|
- src/test/resources/org/embulk/parser/avro/item.avsc
|
87
|
+
- src/test/resources/org/embulk/parser/avro/item2.avsc
|
79
88
|
- src/test/resources/org/embulk/parser/avro/items.avro
|
80
|
-
-
|
89
|
+
- src/test/resources/org/embulk/parser/avro/items2.avro
|
90
|
+
- classpath/avro-1.8.2.jar
|
91
|
+
- classpath/paranamer-2.7.jar
|
92
|
+
- classpath/xz-1.5.jar
|
81
93
|
- classpath/commons-compress-1.8.1.jar
|
82
|
-
- classpath/embulk-parser-avro-0.
|
94
|
+
- classpath/embulk-parser-avro-0.3.0.jar
|
83
95
|
- classpath/jackson-core-asl-1.9.13.jar
|
84
|
-
- classpath/jackson-mapper-asl-1.9.13.jar
|
85
|
-
- classpath/paranamer-2.7.jar
|
86
96
|
- classpath/snappy-java-1.1.1.3.jar
|
87
|
-
- classpath/
|
97
|
+
- classpath/jackson-mapper-asl-1.9.13.jar
|
88
98
|
homepage: https://github.com/joker1007/embulk-parser-avro
|
89
99
|
licenses:
|
90
100
|
- MIT
|