embulk-parser-avro 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62671c1f3feefa2bb7feecccf4362b04f0ad7d3c
4
- data.tar.gz: 68e76d9d06bcc2f30c3d6e376e1fe867027b6e09
3
+ metadata.gz: 2ee00ffbaf0cf78dfc2c7bd0861e68603b528565
4
+ data.tar.gz: 08786ae000735a7f8d2aad1514a02de9c5095ec4
5
5
  SHA512:
6
- metadata.gz: 79e628c578df06d5aa54e17a0038eeb65fa667318be469cdceab9161a4b40716634d1c9cbdd88d17ef47a8d85cf9d4d63939c6340b3efde74995070e3ad37758
7
- data.tar.gz: d1120457b865feed3eab7560f3dff20ab0b767fe17653d6970626836460426135334f0f681f901405ce8107d4a0f3c3fb5aa14e302023132fbad48a22c538168
6
+ metadata.gz: 2697fb6b9cd4fb2cf6a72194450031f11ade7780d2afb050d40d65de39a10a5b56da507125b5580de820ba78e399ff39348726bb0eb82edb0c7aae13de27a555
7
+ data.tar.gz: 839d69c7a19dfc6eeddff3b4fb9f2bee3172b1d4ab05793753bae1fb466ef1bfa91c5e86e099f3fe44bc5c0e0047ed3a34eb789ae26c7e0507b29c25a58ce4ca
data/.gitignore CHANGED
@@ -5,6 +5,7 @@
5
5
  .gradle/
6
6
  /classpath/
7
7
  build/
8
+ out/
8
9
  .idea
9
10
  /.settings/
10
11
  /.metadata/
data/README.md CHANGED
@@ -12,11 +12,37 @@
12
12
  - **type**: Specify this parser as avro
13
13
  - **avsc**: Specify avro schema file.
14
14
  - **columns**: Specify column name and type. See below (array, optional)
15
+ - timestamp_unit: Specify unit of time. (This config is effective only if avro value is `long`, `int`, `float`, `double`)
15
16
  * **default_timezone**: Default timezone of the timestamp (string, default: UTC)
16
17
  * **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`)
17
18
 
18
19
  If columns is not set, this plugin detect schema automatically by using avsc schema.
19
20
 
21
+ support `timestamp_unit` type is below.
22
+
23
+ - "Second"
24
+ - "second"
25
+ - "sec"
26
+ - "s"
27
+ - "MilliSecond"
28
+ - "millisecond"
29
+ - "milli_second"
30
+ - "milli"
31
+ - "msec"
32
+ - "ms"
33
+ - "MicroSecond"
34
+ - "microsecond"
35
+ - "micro_second"
36
+ - "micro"
37
+ - "usec"
38
+ - "us"
39
+ - "NanoSecond"
40
+ - "nanosecond"
41
+ - "nano_second"
42
+ - "nano"
43
+ - "nsec"
44
+ - "ns"
45
+
20
46
  ## Example
21
47
 
22
48
  ```yaml
@@ -38,7 +64,7 @@ in:
38
64
  - {name: "options", type: "json"}
39
65
  - {name: "spec", type: "json"}
40
66
  - {name: "created_at", type: "timestamp", format: "%Y-%m-%dT%H:%M:%S%:z"}
41
- - {name: "created_at_utc", type: "timestamp"}
67
+ - {name: "created_at_utc", type: "timestamp", timestamp_unit: "second"}
42
68
 
43
69
  out:
44
70
  type: stdout
@@ -13,19 +13,19 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.0"
16
+ version = "0.3.0"
17
17
 
18
- sourceCompatibility = 1.7
19
- targetCompatibility = 1.7
18
+ sourceCompatibility = 1.8
19
+ targetCompatibility = 1.8
20
20
 
21
21
  dependencies {
22
- compile "org.embulk:embulk-core:0.8.14"
23
- provided "org.embulk:embulk-core:0.8.14"
24
- compile "org.apache.avro:avro:1.8.0"
22
+ compile "org.embulk:embulk-core:0.9.4"
23
+ provided "org.embulk:embulk-core:0.9.4"
24
+ compile "org.apache.avro:avro:1.8.2"
25
25
  testCompile "junit:junit:4.+"
26
26
 
27
- testCompile "org.embulk:embulk-core:0.8.14:tests"
28
- testCompile "org.embulk:embulk-standards:0.8.14"
27
+ testCompile "org.embulk:embulk-core:0.9.4:tests"
28
+ testCompile "org.embulk:embulk-standards:0.9.4"
29
29
  }
30
30
 
31
31
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -15,6 +15,7 @@ import org.embulk.config.TaskSource;
15
15
  import org.embulk.parser.avro.getter.BaseColumnGetter;
16
16
  import org.embulk.parser.avro.getter.ColumnGetterFactory;
17
17
  import org.embulk.spi.Column;
18
+ import org.embulk.spi.ColumnConfig;
18
19
  import org.embulk.spi.Exec;
19
20
  import org.embulk.spi.FileInput;
20
21
  import org.embulk.spi.PageBuilder;
@@ -23,6 +24,7 @@ import org.embulk.spi.ParserPlugin;
23
24
  import org.embulk.spi.Schema;
24
25
  import org.embulk.spi.SchemaConfig;
25
26
  import org.embulk.spi.time.TimestampParser;
27
+ import org.embulk.spi.type.TimestampType;
26
28
  import org.embulk.spi.type.Types;
27
29
  import org.embulk.spi.unit.LocalFile;
28
30
  import org.embulk.spi.util.FileInputInputStream;
@@ -46,6 +48,13 @@ public class AvroParserPlugin
46
48
  LocalFile getAvsc();
47
49
  }
48
50
 
51
+ public interface TimestampUnitConfig extends Task
52
+ {
53
+ @Config("timestamp_unit")
54
+ @ConfigDefault("\"second\"")
55
+ public TimestampUnit getTimestampUnit();
56
+ }
57
+
49
58
  @Override
50
59
  public void transaction(ConfigSource config, ParserPlugin.Control control)
51
60
  {
@@ -128,6 +137,7 @@ public class AvroParserPlugin
128
137
  PluginTask task = taskSource.loadTask(PluginTask.class);
129
138
  List<Column> columns = schema.getColumns();
130
139
  final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns());
140
+ final TimestampUnit[] timestampUnits = newTimestampUnits(task.getColumns());
131
141
  File avsc = task.getAvsc().getFile();
132
142
  final org.apache.avro.Schema avroSchema;
133
143
  try {
@@ -137,7 +147,7 @@ public class AvroParserPlugin
137
147
  }
138
148
 
139
149
  try (FileInputInputStream is = new FileInputInputStream(input); final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
140
- ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers);
150
+ ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers, timestampUnits);
141
151
  ImmutableMap.Builder<String, BaseColumnGetter> columnGettersBuilder = ImmutableMap.builder();
142
152
  for (Column column : columns) {
143
153
  BaseColumnGetter columnGetter = factory.newColumnGetter(column);
@@ -165,4 +175,17 @@ public class AvroParserPlugin
165
175
  throw new RuntimeException(e);
166
176
  }
167
177
  }
178
+
179
+ private TimestampUnit[] newTimestampUnits(SchemaConfig columns) {
180
+ TimestampUnit[] units = new TimestampUnit[columns.getColumnCount()];
181
+ int i = 0;
182
+ for (ColumnConfig column : columns.getColumns()) {
183
+ if (column.getType() instanceof TimestampType) {
184
+ TimestampUnitConfig option = column.getOption().loadConfig(TimestampUnitConfig.class);
185
+ units[i] = option.getTimestampUnit();
186
+ }
187
+ i++;
188
+ }
189
+ return units;
190
+ }
168
191
  }
@@ -0,0 +1,70 @@
1
+ package org.embulk.parser.avro;
2
+
3
+ import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
4
+ import org.embulk.spi.time.Timestamp;
5
+
6
+
7
+ @JsonDeserialize(using=TimestampUnitDeserializer.class)
8
+ public enum TimestampUnit {
9
+ Second {
10
+ @Override
11
+ public Timestamp toTimestamp(Long value)
12
+ {
13
+ return Timestamp.ofEpochSecond(value);
14
+ }
15
+
16
+ @Override
17
+ public Timestamp toTimestamp(Double value)
18
+ {
19
+ long sec = value.longValue();
20
+ double rest = value - sec;
21
+ return Timestamp.ofEpochSecond(0, sec * 1000000000L + (long) (rest * 1000000000L));
22
+ }
23
+ },
24
+ MilliSecond {
25
+ @Override
26
+ public Timestamp toTimestamp(Long value)
27
+ {
28
+ return Timestamp.ofEpochSecond(0, value * 1000000L);
29
+ }
30
+
31
+ @Override
32
+ public Timestamp toTimestamp(Double value)
33
+ {
34
+ long sec = value.longValue();
35
+ double rest = value - sec;
36
+ return Timestamp.ofEpochSecond(0, sec * 1000000L + (long) (rest * 1000000L));
37
+ }
38
+ },
39
+ MicroSecond {
40
+ @Override
41
+ public Timestamp toTimestamp(Long value)
42
+ {
43
+ return Timestamp.ofEpochSecond(0, value * 1000L);
44
+ }
45
+
46
+ @Override
47
+ public Timestamp toTimestamp(Double value)
48
+ {
49
+ long sec = value.longValue();
50
+ double rest = value - sec;
51
+ return Timestamp.ofEpochSecond(0, sec * 1000L + (long) (rest * 1000L));
52
+ }
53
+ },
54
+ NanoSecond {
55
+ @Override
56
+ public Timestamp toTimestamp(Long value)
57
+ {
58
+ return Timestamp.ofEpochSecond(0, value);
59
+ }
60
+
61
+ @Override
62
+ public Timestamp toTimestamp(Double value)
63
+ {
64
+ return Timestamp.ofEpochSecond(0, value.longValue());
65
+ }
66
+ };
67
+
68
+ abstract public Timestamp toTimestamp(Long value);
69
+ abstract public Timestamp toTimestamp(Double value);
70
+ }
@@ -0,0 +1,55 @@
1
+ package org.embulk.parser.avro;
2
+
3
+ import com.fasterxml.jackson.core.JsonProcessingException;
4
+ import com.fasterxml.jackson.databind.DeserializationContext;
5
+ import com.fasterxml.jackson.databind.JsonMappingException;
6
+ import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer;
7
+ import com.google.common.base.Joiner;
8
+ import com.google.common.collect.ImmutableMap;
9
+
10
+ import java.io.IOException;
11
+
12
+ public class TimestampUnitDeserializer extends FromStringDeserializer<TimestampUnit>
13
+ {
14
+
15
+ public static ImmutableMap<String, TimestampUnit> mapping = ImmutableMap.<String, TimestampUnit>builder()
16
+ .put("Second", TimestampUnit.Second)
17
+ .put("second", TimestampUnit.Second)
18
+ .put("sec", TimestampUnit.Second)
19
+ .put("s", TimestampUnit.Second)
20
+ .put("MilliSecond", TimestampUnit.MilliSecond)
21
+ .put("millisecond", TimestampUnit.MilliSecond)
22
+ .put("milli_second", TimestampUnit.MilliSecond)
23
+ .put("milli", TimestampUnit.MilliSecond)
24
+ .put("msec", TimestampUnit.MilliSecond)
25
+ .put("ms", TimestampUnit.MilliSecond)
26
+ .put("MicroSecond", TimestampUnit.MicroSecond)
27
+ .put("microsecond", TimestampUnit.MicroSecond)
28
+ .put("micro_second", TimestampUnit.MicroSecond)
29
+ .put("micro", TimestampUnit.MicroSecond)
30
+ .put("usec", TimestampUnit.MicroSecond)
31
+ .put("us", TimestampUnit.MicroSecond)
32
+ .put("NanoSecond", TimestampUnit.NanoSecond)
33
+ .put("nanosecond", TimestampUnit.NanoSecond)
34
+ .put("nano_second", TimestampUnit.NanoSecond)
35
+ .put("nano", TimestampUnit.NanoSecond)
36
+ .put("nsec", TimestampUnit.NanoSecond)
37
+ .put("ns", TimestampUnit.NanoSecond)
38
+ .build();
39
+
40
+ public TimestampUnitDeserializer() {
41
+ super(TimestampUnit.class);
42
+ }
43
+
44
+ @Override
45
+ protected TimestampUnit _deserialize(String value, DeserializationContext ctxt) throws IOException, JsonProcessingException {
46
+ TimestampUnit unit = mapping.get(value);
47
+ if (unit == null) {
48
+ throw new JsonMappingException(
49
+ String.format("Unknown type name '%s'. Supported types are: %s",
50
+ value,
51
+ Joiner.on(", ").join(mapping.keySet())));
52
+ }
53
+ return unit;
54
+ }
55
+ }
@@ -1,6 +1,7 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
3
  import org.apache.avro.Schema;
4
+ import org.embulk.parser.avro.TimestampUnit;
4
5
  import org.embulk.spi.Column;
5
6
  import org.embulk.spi.DataException;
6
7
  import org.embulk.spi.PageBuilder;
@@ -10,12 +11,14 @@ public class ColumnGetterFactory {
10
11
  private org.apache.avro.Schema avroSchema;
11
12
  private PageBuilder pageBuilder;
12
13
  private TimestampParser[] timestampParsers;
14
+ private TimestampUnit[] timestampUnits;
13
15
 
14
- public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
16
+ public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits)
15
17
  {
16
18
  this.avroSchema = avroSchema;
17
19
  this.pageBuilder = pageBuilder;
18
20
  this.timestampParsers = timestampParsers;
21
+ this.timestampUnits = timestampUnits;
19
22
  }
20
23
 
21
24
  public BaseColumnGetter newColumnGetter(Column column)
@@ -43,13 +46,13 @@ public class ColumnGetterFactory {
43
46
  case ENUM:
44
47
  return new StringColumnGetter(pageBuilder, timestampParsers);
45
48
  case INT:
46
- return new IntegerColumnGetter(pageBuilder, timestampParsers);
49
+ return new IntegerColumnGetter(pageBuilder, timestampParsers, timestampUnits);
47
50
  case LONG:
48
- return new LongColumnGetter(pageBuilder, timestampParsers);
51
+ return new LongColumnGetter(pageBuilder, timestampParsers, timestampUnits);
49
52
  case FLOAT:
50
- return new FloatColumnGetter(pageBuilder, timestampParsers);
53
+ return new FloatColumnGetter(pageBuilder, timestampParsers, timestampUnits);
51
54
  case DOUBLE:
52
- return new DoubleColumnGetter(pageBuilder, timestampParsers);
55
+ return new DoubleColumnGetter(pageBuilder, timestampParsers, timestampUnits);
53
56
  case BOOLEAN:
54
57
  return new BooleanColumnGetter(pageBuilder, timestampParsers);
55
58
  case ARRAY:
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class DoubleColumnGetter extends BaseColumnGetter {
9
9
  protected Double value;
10
10
 
11
- public DoubleColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public DoubleColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -51,8 +54,8 @@ public class DoubleColumnGetter extends BaseColumnGetter {
51
54
  pageBuilder.setNull(column);
52
55
  }
53
56
  else {
54
- long milliSec = (long) (value * 1000);
55
- pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(milliSec));
57
+ TimestampUnit unit = timestampUnits[column.getIndex()];
58
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value));
56
59
  }
57
60
  }
58
61
  }
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class FloatColumnGetter extends BaseColumnGetter {
9
9
  protected Float value;
10
10
 
11
- public FloatColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public FloatColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -51,8 +54,8 @@ public class FloatColumnGetter extends BaseColumnGetter {
51
54
  pageBuilder.setNull(column);
52
55
  }
53
56
  else {
54
- long milliSec = (long) (value * 1000);
55
- pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(milliSec));
57
+ TimestampUnit unit = timestampUnits[column.getIndex()];
58
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value.doubleValue()));
56
59
  }
57
60
  }
58
61
  }
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class IntegerColumnGetter extends BaseColumnGetter {
9
9
  protected Integer value;
10
10
 
11
- public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -51,7 +54,8 @@ public class IntegerColumnGetter extends BaseColumnGetter {
51
54
  pageBuilder.setNull(column);
52
55
  }
53
56
  else {
54
- pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value.longValue()));
57
+ TimestampUnit unit = timestampUnits[column.getIndex()];
58
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value.longValue()));
55
59
  }
56
60
  }
57
61
  }
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class LongColumnGetter extends BaseColumnGetter {
9
9
  protected Long value;
10
10
 
11
- public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -52,7 +55,8 @@ public class LongColumnGetter extends BaseColumnGetter {
52
55
  pageBuilder.setNull(column);
53
56
  }
54
57
  else {
55
- pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value));
58
+ TimestampUnit unit = timestampUnits[column.getIndex()];
59
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value));
56
60
  }
57
61
  }
58
62
  }
@@ -34,6 +34,7 @@ import static org.embulk.spi.type.Types.JSON;
34
34
  import static org.embulk.spi.type.Types.TIMESTAMP;
35
35
  import static org.junit.Assert.assertEquals;
36
36
  import static org.junit.Assert.assertNull;
37
+ import static org.junit.Assert.assertTrue;
37
38
 
38
39
  public class TestAvroParserPlugin
39
40
  {
@@ -50,6 +51,7 @@ public class TestAvroParserPlugin
50
51
  {
51
52
  config = config().set("type", "avro");
52
53
  plugin = new AvroParserPlugin();
54
+
53
55
  recreatePageOutput();
54
56
  }
55
57
 
@@ -69,7 +71,7 @@ public class TestAvroParserPlugin
69
71
  column("options", JSON),
70
72
  column("spec", JSON),
71
73
  column("created_at", TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S%:z")),
72
- column("created_at_utc", TIMESTAMP)
74
+ column("created_at_utc", TIMESTAMP, config().set("timestamp_unit", "second"))
73
75
  );
74
76
 
75
77
  ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item.avsc").getPath());
@@ -89,7 +91,44 @@ public class TestAvroParserPlugin
89
91
  assertEquals("bar", ((MapValue)record[8]).map().get(ValueFactory.newString("foo")).toString());
90
92
  assertEquals("opt1", ((MapValue)record[9]).map().get(ValueFactory.newString("key")).toString());
91
93
  assertEquals("2016-05-08 19:35:43 UTC", record[10].toString());
92
- assertEquals("2016-05-08 19:35:25.952 UTC", record[11].toString());
94
+ assertEquals("2016-05-08 19:35:28 UTC", record[11].toString());
95
+ }
96
+
97
+ @Test
98
+ public void useTimestampUnit()
99
+ throws Exception
100
+ {
101
+ SchemaConfig schema = schema(
102
+ column("timestamp", TIMESTAMP, config().set("timestamp_unit", "second")),
103
+ column("timestamp_long", TIMESTAMP, config().set("timestamp_unit", "second")),
104
+ column("timestamp_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
105
+ column("timestamp_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
106
+ column("timestamp_nano", TIMESTAMP, config().set("timestamp_unit", "nano")),
107
+ column("timestamp_float", TIMESTAMP, config().set("timestamp_unit", "second")),
108
+ column("timestamp_double", TIMESTAMP, config().set("timestamp_unit", "second")),
109
+ column("timestamp_double_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
110
+ column("timestamp_double_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
111
+ column("timestamp_double_nano", TIMESTAMP, config().set("timestamp_unit", "nano"))
112
+ );
113
+
114
+ ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item2.avsc").getPath());
115
+
116
+ transaction(config, fileInput(new File(this.getClass().getResource("items2.avro").getPath())));
117
+
118
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
119
+ assertEquals(1, records.size());
120
+
121
+ Object[] record = records.get(0);
122
+ assertEquals("2018-02-23 12:13:52 UTC", record[0].toString());
123
+ assertEquals("2018-02-23 12:13:52 UTC", record[1].toString());
124
+ assertEquals("2018-02-23 12:13:52.717 UTC", record[2].toString());
125
+ assertEquals("2018-02-23 12:13:52.717249 UTC", record[3].toString());
126
+ assertEquals("2018-02-23 12:13:52.717249634 UTC", record[4].toString());
127
+ assertEquals("2018-02-23 12:13:52 UTC", record[5].toString());
128
+ assertTrue(record[6].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
129
+ assertTrue(record[7].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
130
+ assertTrue(record[8].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
131
+ assertTrue(record[9].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
93
132
  }
94
133
 
95
134
  @Test
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ gem "avro"
@@ -0,0 +1,15 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ avro (1.8.2)
5
+ multi_json
6
+ multi_json (1.13.1)
7
+
8
+ PLATFORMS
9
+ ruby
10
+
11
+ DEPENDENCIES
12
+ avro
13
+
14
+ BUNDLED WITH
15
+ 1.16.1
@@ -0,0 +1,6 @@
1
+ {"id":1,"code":123456789012345678,"name":"Desktop","description":"Office and Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":30000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{"hoge":null,"foo":"bar"},"item_type":"D","dummy":null}
2
+ {"id":2,"code":123456789012345679,"name":"Laptop","description":null,"flag":false,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":50000.0,"spec":{"key":"opt1","value":null},"tags":null,"options":{},"item_type":"M","dummy":null}
3
+ {"id":3,"code":123456789012345680,"name":"Tablet","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":null,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag3"],"options":{},"item_type":"M","dummy":null}
4
+ {"id":4,"code":123456789012345681,"name":"Mobile","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":10000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":[],"options":{},"item_type":"M","dummy":null}
5
+ {"id":5,"code":123456789012345682,"name":"Notepad","description":null,"flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":20000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
6
+ {"id":6,"code":123456789012345683,"name":"SmartPhone","description":"Multipurpose","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":40000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
@@ -0,0 +1 @@
1
+ {"timestamp": 1519388032, "timestamp_long": 1519388032, "timestamp_milli": 1519388032717, "timestamp_micro": 1519388032717249, "timestamp_nano": 1519388032717249634, "timestamp_float": 1519388032.7172496, "timestamp_float_milli": 1519388032717.2496, "timestamp_float_micro": 1519388032717249.6, "timestamp_float_nano": 1519388032717249634.0,"timestamp_double": 1519388032.717249634, "timestamp_double_milli": 1519388032717.249634, "timestamp_double_micro": 1519388032717249.634, "timestamp_double_nano": 1519388032717249634.0}
@@ -0,0 +1,17 @@
1
+ require 'avro'
2
+ require 'json'
3
+
4
+ schema = Avro::Schema.parse(File.read(ARGV[0]))
5
+ file = File.open(ARGV[1], 'wb')
6
+ writer = Avro::IO::DatumWriter.new(schema)
7
+ dw = Avro::DataFile::Writer.new(file, writer, schema)
8
+
9
+ data = File.read(ARGV[2]).each_line.map do |l|
10
+ JSON.load(l)
11
+ end
12
+
13
+ data.each do |d|
14
+ dw << d
15
+ end
16
+
17
+ dw.close
@@ -0,0 +1,20 @@
1
+ {
2
+ "type" : "record",
3
+ "name" : "Item",
4
+ "namespace" : "example.avro",
5
+ "fields" : [
6
+ {"name": "timestamp", "type": "int"},
7
+ {"name": "timestamp_long", "type": "long"},
8
+ {"name": "timestamp_milli", "type": "long"},
9
+ {"name": "timestamp_micro", "type": "long"},
10
+ {"name": "timestamp_nano", "type": "long"},
11
+ {"name": "timestamp_float", "type": "float"},
12
+ {"name": "timestamp_float_milli", "type": "float"},
13
+ {"name": "timestamp_float_micro", "type": "float"},
14
+ {"name": "timestamp_float_nano", "type": "float"},
15
+ {"name": "timestamp_double", "type": "double"},
16
+ {"name": "timestamp_double_milli", "type": "double"},
17
+ {"name": "timestamp_double_micro", "type": "double"},
18
+ {"name": "timestamp_double_nano", "type": "double"}
19
+ ]
20
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-04 00:00:00.000000000 Z
11
+ date: 2018-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Parses Avro files read by other file input plugins.
42
42
  email:
43
43
  - kakyoin.hierophant@gmail.com
@@ -64,6 +64,8 @@ files:
64
64
  - lib/embulk/guess/avro.rb
65
65
  - lib/embulk/parser/avro.rb
66
66
  - src/main/java/org/embulk/parser/avro/AvroParserPlugin.java
67
+ - src/main/java/org/embulk/parser/avro/TimestampUnit.java
68
+ - src/main/java/org/embulk/parser/avro/TimestampUnitDeserializer.java
67
69
  - src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java
68
70
  - src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java
69
71
  - src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java
@@ -75,16 +77,24 @@ files:
75
77
  - src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java
76
78
  - src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java
77
79
  - src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java
80
+ - src/test/resources/org/embulk/parser/avro/.gitignore
81
+ - src/test/resources/org/embulk/parser/avro/Gemfile
82
+ - src/test/resources/org/embulk/parser/avro/Gemfile.lock
83
+ - src/test/resources/org/embulk/parser/avro/data.json
84
+ - src/test/resources/org/embulk/parser/avro/data2.json
85
+ - src/test/resources/org/embulk/parser/avro/data_creator.rb
78
86
  - src/test/resources/org/embulk/parser/avro/item.avsc
87
+ - src/test/resources/org/embulk/parser/avro/item2.avsc
79
88
  - src/test/resources/org/embulk/parser/avro/items.avro
80
- - classpath/avro-1.8.0.jar
89
+ - src/test/resources/org/embulk/parser/avro/items2.avro
90
+ - classpath/avro-1.8.2.jar
91
+ - classpath/paranamer-2.7.jar
92
+ - classpath/xz-1.5.jar
81
93
  - classpath/commons-compress-1.8.1.jar
82
- - classpath/embulk-parser-avro-0.2.0.jar
94
+ - classpath/embulk-parser-avro-0.3.0.jar
83
95
  - classpath/jackson-core-asl-1.9.13.jar
84
- - classpath/jackson-mapper-asl-1.9.13.jar
85
- - classpath/paranamer-2.7.jar
86
96
  - classpath/snappy-java-1.1.1.3.jar
87
- - classpath/xz-1.5.jar
97
+ - classpath/jackson-mapper-asl-1.9.13.jar
88
98
  homepage: https://github.com/joker1007/embulk-parser-avro
89
99
  licenses:
90
100
  - MIT