embulk-parser-avro 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 62671c1f3feefa2bb7feecccf4362b04f0ad7d3c
4
- data.tar.gz: 68e76d9d06bcc2f30c3d6e376e1fe867027b6e09
3
+ metadata.gz: 2ee00ffbaf0cf78dfc2c7bd0861e68603b528565
4
+ data.tar.gz: 08786ae000735a7f8d2aad1514a02de9c5095ec4
5
5
  SHA512:
6
- metadata.gz: 79e628c578df06d5aa54e17a0038eeb65fa667318be469cdceab9161a4b40716634d1c9cbdd88d17ef47a8d85cf9d4d63939c6340b3efde74995070e3ad37758
7
- data.tar.gz: d1120457b865feed3eab7560f3dff20ab0b767fe17653d6970626836460426135334f0f681f901405ce8107d4a0f3c3fb5aa14e302023132fbad48a22c538168
6
+ metadata.gz: 2697fb6b9cd4fb2cf6a72194450031f11ade7780d2afb050d40d65de39a10a5b56da507125b5580de820ba78e399ff39348726bb0eb82edb0c7aae13de27a555
7
+ data.tar.gz: 839d69c7a19dfc6eeddff3b4fb9f2bee3172b1d4ab05793753bae1fb466ef1bfa91c5e86e099f3fe44bc5c0e0047ed3a34eb789ae26c7e0507b29c25a58ce4ca
data/.gitignore CHANGED
@@ -5,6 +5,7 @@
5
5
  .gradle/
6
6
  /classpath/
7
7
  build/
8
+ out/
8
9
  .idea
9
10
  /.settings/
10
11
  /.metadata/
data/README.md CHANGED
@@ -12,11 +12,37 @@
12
12
  - **type**: Specify this parser as avro
13
13
  - **avsc**: Specify avro schema file.
14
14
  - **columns**: Specify column name and type. See below (array, optional)
15
+ - timestamp_unit: Specify unit of time. (This config is effective only if avro value is `long`, `int`, `float`, `double`)
15
16
  * **default_timezone**: Default timezone of the timestamp (string, default: UTC)
16
17
  * **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`)
17
18
 
18
19
  If columns is not set, this plugin detect schema automatically by using avsc schema.
19
20
 
21
+ support `timestamp_unit` type is below.
22
+
23
+ - "Second"
24
+ - "second"
25
+ - "sec"
26
+ - "s"
27
+ - "MilliSecond"
28
+ - "millisecond"
29
+ - "milli_second"
30
+ - "milli"
31
+ - "msec"
32
+ - "ms"
33
+ - "MicroSecond"
34
+ - "microsecond"
35
+ - "micro_second"
36
+ - "micro"
37
+ - "usec"
38
+ - "us"
39
+ - "NanoSecond"
40
+ - "nanosecond"
41
+ - "nano_second"
42
+ - "nano"
43
+ - "nsec"
44
+ - "ns"
45
+
20
46
  ## Example
21
47
 
22
48
  ```yaml
@@ -38,7 +64,7 @@ in:
38
64
  - {name: "options", type: "json"}
39
65
  - {name: "spec", type: "json"}
40
66
  - {name: "created_at", type: "timestamp", format: "%Y-%m-%dT%H:%M:%S%:z"}
41
- - {name: "created_at_utc", type: "timestamp"}
67
+ - {name: "created_at_utc", type: "timestamp", timestamp_unit: "second"}
42
68
 
43
69
  out:
44
70
  type: stdout
@@ -13,19 +13,19 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.2.0"
16
+ version = "0.3.0"
17
17
 
18
- sourceCompatibility = 1.7
19
- targetCompatibility = 1.7
18
+ sourceCompatibility = 1.8
19
+ targetCompatibility = 1.8
20
20
 
21
21
  dependencies {
22
- compile "org.embulk:embulk-core:0.8.14"
23
- provided "org.embulk:embulk-core:0.8.14"
24
- compile "org.apache.avro:avro:1.8.0"
22
+ compile "org.embulk:embulk-core:0.9.4"
23
+ provided "org.embulk:embulk-core:0.9.4"
24
+ compile "org.apache.avro:avro:1.8.2"
25
25
  testCompile "junit:junit:4.+"
26
26
 
27
- testCompile "org.embulk:embulk-core:0.8.14:tests"
28
- testCompile "org.embulk:embulk-standards:0.8.14"
27
+ testCompile "org.embulk:embulk-core:0.9.4:tests"
28
+ testCompile "org.embulk:embulk-standards:0.9.4"
29
29
  }
30
30
 
31
31
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -15,6 +15,7 @@ import org.embulk.config.TaskSource;
15
15
  import org.embulk.parser.avro.getter.BaseColumnGetter;
16
16
  import org.embulk.parser.avro.getter.ColumnGetterFactory;
17
17
  import org.embulk.spi.Column;
18
+ import org.embulk.spi.ColumnConfig;
18
19
  import org.embulk.spi.Exec;
19
20
  import org.embulk.spi.FileInput;
20
21
  import org.embulk.spi.PageBuilder;
@@ -23,6 +24,7 @@ import org.embulk.spi.ParserPlugin;
23
24
  import org.embulk.spi.Schema;
24
25
  import org.embulk.spi.SchemaConfig;
25
26
  import org.embulk.spi.time.TimestampParser;
27
+ import org.embulk.spi.type.TimestampType;
26
28
  import org.embulk.spi.type.Types;
27
29
  import org.embulk.spi.unit.LocalFile;
28
30
  import org.embulk.spi.util.FileInputInputStream;
@@ -46,6 +48,13 @@ public class AvroParserPlugin
46
48
  LocalFile getAvsc();
47
49
  }
48
50
 
51
+ public interface TimestampUnitConfig extends Task
52
+ {
53
+ @Config("timestamp_unit")
54
+ @ConfigDefault("\"second\"")
55
+ public TimestampUnit getTimestampUnit();
56
+ }
57
+
49
58
  @Override
50
59
  public void transaction(ConfigSource config, ParserPlugin.Control control)
51
60
  {
@@ -128,6 +137,7 @@ public class AvroParserPlugin
128
137
  PluginTask task = taskSource.loadTask(PluginTask.class);
129
138
  List<Column> columns = schema.getColumns();
130
139
  final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns());
140
+ final TimestampUnit[] timestampUnits = newTimestampUnits(task.getColumns());
131
141
  File avsc = task.getAvsc().getFile();
132
142
  final org.apache.avro.Schema avroSchema;
133
143
  try {
@@ -137,7 +147,7 @@ public class AvroParserPlugin
137
147
  }
138
148
 
139
149
  try (FileInputInputStream is = new FileInputInputStream(input); final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
140
- ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers);
150
+ ColumnGetterFactory factory = new ColumnGetterFactory(avroSchema, pageBuilder, timestampParsers, timestampUnits);
141
151
  ImmutableMap.Builder<String, BaseColumnGetter> columnGettersBuilder = ImmutableMap.builder();
142
152
  for (Column column : columns) {
143
153
  BaseColumnGetter columnGetter = factory.newColumnGetter(column);
@@ -165,4 +175,17 @@ public class AvroParserPlugin
165
175
  throw new RuntimeException(e);
166
176
  }
167
177
  }
178
+
179
+ private TimestampUnit[] newTimestampUnits(SchemaConfig columns) {
180
+ TimestampUnit[] units = new TimestampUnit[columns.getColumnCount()];
181
+ int i = 0;
182
+ for (ColumnConfig column : columns.getColumns()) {
183
+ if (column.getType() instanceof TimestampType) {
184
+ TimestampUnitConfig option = column.getOption().loadConfig(TimestampUnitConfig.class);
185
+ units[i] = option.getTimestampUnit();
186
+ }
187
+ i++;
188
+ }
189
+ return units;
190
+ }
168
191
  }
@@ -0,0 +1,70 @@
1
+ package org.embulk.parser.avro;
2
+
3
+ import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
4
+ import org.embulk.spi.time.Timestamp;
5
+
6
+
7
+ @JsonDeserialize(using=TimestampUnitDeserializer.class)
8
+ public enum TimestampUnit {
9
+ Second {
10
+ @Override
11
+ public Timestamp toTimestamp(Long value)
12
+ {
13
+ return Timestamp.ofEpochSecond(value);
14
+ }
15
+
16
+ @Override
17
+ public Timestamp toTimestamp(Double value)
18
+ {
19
+ long sec = value.longValue();
20
+ double rest = value - sec;
21
+ return Timestamp.ofEpochSecond(0, sec * 1000000000L + (long) (rest * 1000000000L));
22
+ }
23
+ },
24
+ MilliSecond {
25
+ @Override
26
+ public Timestamp toTimestamp(Long value)
27
+ {
28
+ return Timestamp.ofEpochSecond(0, value * 1000000L);
29
+ }
30
+
31
+ @Override
32
+ public Timestamp toTimestamp(Double value)
33
+ {
34
+ long sec = value.longValue();
35
+ double rest = value - sec;
36
+ return Timestamp.ofEpochSecond(0, sec * 1000000L + (long) (rest * 1000000L));
37
+ }
38
+ },
39
+ MicroSecond {
40
+ @Override
41
+ public Timestamp toTimestamp(Long value)
42
+ {
43
+ return Timestamp.ofEpochSecond(0, value * 1000L);
44
+ }
45
+
46
+ @Override
47
+ public Timestamp toTimestamp(Double value)
48
+ {
49
+ long sec = value.longValue();
50
+ double rest = value - sec;
51
+ return Timestamp.ofEpochSecond(0, sec * 1000L + (long) (rest * 1000L));
52
+ }
53
+ },
54
+ NanoSecond {
55
+ @Override
56
+ public Timestamp toTimestamp(Long value)
57
+ {
58
+ return Timestamp.ofEpochSecond(0, value);
59
+ }
60
+
61
+ @Override
62
+ public Timestamp toTimestamp(Double value)
63
+ {
64
+ return Timestamp.ofEpochSecond(0, value.longValue());
65
+ }
66
+ };
67
+
68
+ abstract public Timestamp toTimestamp(Long value);
69
+ abstract public Timestamp toTimestamp(Double value);
70
+ }
@@ -0,0 +1,55 @@
1
+ package org.embulk.parser.avro;
2
+
3
+ import com.fasterxml.jackson.core.JsonProcessingException;
4
+ import com.fasterxml.jackson.databind.DeserializationContext;
5
+ import com.fasterxml.jackson.databind.JsonMappingException;
6
+ import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer;
7
+ import com.google.common.base.Joiner;
8
+ import com.google.common.collect.ImmutableMap;
9
+
10
+ import java.io.IOException;
11
+
12
+ public class TimestampUnitDeserializer extends FromStringDeserializer<TimestampUnit>
13
+ {
14
+
15
+ public static ImmutableMap<String, TimestampUnit> mapping = ImmutableMap.<String, TimestampUnit>builder()
16
+ .put("Second", TimestampUnit.Second)
17
+ .put("second", TimestampUnit.Second)
18
+ .put("sec", TimestampUnit.Second)
19
+ .put("s", TimestampUnit.Second)
20
+ .put("MilliSecond", TimestampUnit.MilliSecond)
21
+ .put("millisecond", TimestampUnit.MilliSecond)
22
+ .put("milli_second", TimestampUnit.MilliSecond)
23
+ .put("milli", TimestampUnit.MilliSecond)
24
+ .put("msec", TimestampUnit.MilliSecond)
25
+ .put("ms", TimestampUnit.MilliSecond)
26
+ .put("MicroSecond", TimestampUnit.MicroSecond)
27
+ .put("microsecond", TimestampUnit.MicroSecond)
28
+ .put("micro_second", TimestampUnit.MicroSecond)
29
+ .put("micro", TimestampUnit.MicroSecond)
30
+ .put("usec", TimestampUnit.MicroSecond)
31
+ .put("us", TimestampUnit.MicroSecond)
32
+ .put("NanoSecond", TimestampUnit.NanoSecond)
33
+ .put("nanosecond", TimestampUnit.NanoSecond)
34
+ .put("nano_second", TimestampUnit.NanoSecond)
35
+ .put("nano", TimestampUnit.NanoSecond)
36
+ .put("nsec", TimestampUnit.NanoSecond)
37
+ .put("ns", TimestampUnit.NanoSecond)
38
+ .build();
39
+
40
+ public TimestampUnitDeserializer() {
41
+ super(TimestampUnit.class);
42
+ }
43
+
44
+ @Override
45
+ protected TimestampUnit _deserialize(String value, DeserializationContext ctxt) throws IOException, JsonProcessingException {
46
+ TimestampUnit unit = mapping.get(value);
47
+ if (unit == null) {
48
+ throw new JsonMappingException(
49
+ String.format("Unknown type name '%s'. Supported types are: %s",
50
+ value,
51
+ Joiner.on(", ").join(mapping.keySet())));
52
+ }
53
+ return unit;
54
+ }
55
+ }
@@ -1,6 +1,7 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
3
  import org.apache.avro.Schema;
4
+ import org.embulk.parser.avro.TimestampUnit;
4
5
  import org.embulk.spi.Column;
5
6
  import org.embulk.spi.DataException;
6
7
  import org.embulk.spi.PageBuilder;
@@ -10,12 +11,14 @@ public class ColumnGetterFactory {
10
11
  private org.apache.avro.Schema avroSchema;
11
12
  private PageBuilder pageBuilder;
12
13
  private TimestampParser[] timestampParsers;
14
+ private TimestampUnit[] timestampUnits;
13
15
 
14
- public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers)
16
+ public ColumnGetterFactory(org.apache.avro.Schema avroSchema, PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits)
15
17
  {
16
18
  this.avroSchema = avroSchema;
17
19
  this.pageBuilder = pageBuilder;
18
20
  this.timestampParsers = timestampParsers;
21
+ this.timestampUnits = timestampUnits;
19
22
  }
20
23
 
21
24
  public BaseColumnGetter newColumnGetter(Column column)
@@ -43,13 +46,13 @@ public class ColumnGetterFactory {
43
46
  case ENUM:
44
47
  return new StringColumnGetter(pageBuilder, timestampParsers);
45
48
  case INT:
46
- return new IntegerColumnGetter(pageBuilder, timestampParsers);
49
+ return new IntegerColumnGetter(pageBuilder, timestampParsers, timestampUnits);
47
50
  case LONG:
48
- return new LongColumnGetter(pageBuilder, timestampParsers);
51
+ return new LongColumnGetter(pageBuilder, timestampParsers, timestampUnits);
49
52
  case FLOAT:
50
- return new FloatColumnGetter(pageBuilder, timestampParsers);
53
+ return new FloatColumnGetter(pageBuilder, timestampParsers, timestampUnits);
51
54
  case DOUBLE:
52
- return new DoubleColumnGetter(pageBuilder, timestampParsers);
55
+ return new DoubleColumnGetter(pageBuilder, timestampParsers, timestampUnits);
53
56
  case BOOLEAN:
54
57
  return new BooleanColumnGetter(pageBuilder, timestampParsers);
55
58
  case ARRAY:
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class DoubleColumnGetter extends BaseColumnGetter {
9
9
  protected Double value;
10
10
 
11
- public DoubleColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public DoubleColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -51,8 +54,8 @@ public class DoubleColumnGetter extends BaseColumnGetter {
51
54
  pageBuilder.setNull(column);
52
55
  }
53
56
  else {
54
- long milliSec = (long) (value * 1000);
55
- pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(milliSec));
57
+ TimestampUnit unit = timestampUnits[column.getIndex()];
58
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value));
56
59
  }
57
60
  }
58
61
  }
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class FloatColumnGetter extends BaseColumnGetter {
9
9
  protected Float value;
10
10
 
11
- public FloatColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public FloatColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -51,8 +54,8 @@ public class FloatColumnGetter extends BaseColumnGetter {
51
54
  pageBuilder.setNull(column);
52
55
  }
53
56
  else {
54
- long milliSec = (long) (value * 1000);
55
- pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(milliSec));
57
+ TimestampUnit unit = timestampUnits[column.getIndex()];
58
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value.doubleValue()));
56
59
  }
57
60
  }
58
61
  }
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class IntegerColumnGetter extends BaseColumnGetter {
9
9
  protected Integer value;
10
10
 
11
- public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -51,7 +54,8 @@ public class IntegerColumnGetter extends BaseColumnGetter {
51
54
  pageBuilder.setNull(column);
52
55
  }
53
56
  else {
54
- pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value.longValue()));
57
+ TimestampUnit unit = timestampUnits[column.getIndex()];
58
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value.longValue()));
55
59
  }
56
60
  }
57
61
  }
@@ -1,15 +1,18 @@
1
1
  package org.embulk.parser.avro.getter;
2
2
 
3
+ import org.embulk.parser.avro.TimestampUnit;
3
4
  import org.embulk.spi.Column;
4
5
  import org.embulk.spi.PageBuilder;
5
- import org.embulk.spi.time.Timestamp;
6
6
  import org.embulk.spi.time.TimestampParser;
7
7
 
8
8
  public class LongColumnGetter extends BaseColumnGetter {
9
9
  protected Long value;
10
10
 
11
- public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
11
+ private final TimestampUnit[] timestampUnits;
12
+
13
+ public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers, TimestampUnit[] timestampUnits) {
12
14
  super(pageBuilder, timestampParsers);
15
+ this.timestampUnits = timestampUnits;
13
16
  }
14
17
 
15
18
  @Override
@@ -52,7 +55,8 @@ public class LongColumnGetter extends BaseColumnGetter {
52
55
  pageBuilder.setNull(column);
53
56
  }
54
57
  else {
55
- pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value));
58
+ TimestampUnit unit = timestampUnits[column.getIndex()];
59
+ pageBuilder.setTimestamp(column, unit.toTimestamp(value));
56
60
  }
57
61
  }
58
62
  }
@@ -34,6 +34,7 @@ import static org.embulk.spi.type.Types.JSON;
34
34
  import static org.embulk.spi.type.Types.TIMESTAMP;
35
35
  import static org.junit.Assert.assertEquals;
36
36
  import static org.junit.Assert.assertNull;
37
+ import static org.junit.Assert.assertTrue;
37
38
 
38
39
  public class TestAvroParserPlugin
39
40
  {
@@ -50,6 +51,7 @@ public class TestAvroParserPlugin
50
51
  {
51
52
  config = config().set("type", "avro");
52
53
  plugin = new AvroParserPlugin();
54
+
53
55
  recreatePageOutput();
54
56
  }
55
57
 
@@ -69,7 +71,7 @@ public class TestAvroParserPlugin
69
71
  column("options", JSON),
70
72
  column("spec", JSON),
71
73
  column("created_at", TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S%:z")),
72
- column("created_at_utc", TIMESTAMP)
74
+ column("created_at_utc", TIMESTAMP, config().set("timestamp_unit", "second"))
73
75
  );
74
76
 
75
77
  ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item.avsc").getPath());
@@ -89,7 +91,44 @@ public class TestAvroParserPlugin
89
91
  assertEquals("bar", ((MapValue)record[8]).map().get(ValueFactory.newString("foo")).toString());
90
92
  assertEquals("opt1", ((MapValue)record[9]).map().get(ValueFactory.newString("key")).toString());
91
93
  assertEquals("2016-05-08 19:35:43 UTC", record[10].toString());
92
- assertEquals("2016-05-08 19:35:25.952 UTC", record[11].toString());
94
+ assertEquals("2016-05-08 19:35:28 UTC", record[11].toString());
95
+ }
96
+
97
+ @Test
98
+ public void useTimestampUnit()
99
+ throws Exception
100
+ {
101
+ SchemaConfig schema = schema(
102
+ column("timestamp", TIMESTAMP, config().set("timestamp_unit", "second")),
103
+ column("timestamp_long", TIMESTAMP, config().set("timestamp_unit", "second")),
104
+ column("timestamp_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
105
+ column("timestamp_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
106
+ column("timestamp_nano", TIMESTAMP, config().set("timestamp_unit", "nano")),
107
+ column("timestamp_float", TIMESTAMP, config().set("timestamp_unit", "second")),
108
+ column("timestamp_double", TIMESTAMP, config().set("timestamp_unit", "second")),
109
+ column("timestamp_double_milli", TIMESTAMP, config().set("timestamp_unit", "milli")),
110
+ column("timestamp_double_micro", TIMESTAMP, config().set("timestamp_unit", "micro")),
111
+ column("timestamp_double_nano", TIMESTAMP, config().set("timestamp_unit", "nano"))
112
+ );
113
+
114
+ ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item2.avsc").getPath());
115
+
116
+ transaction(config, fileInput(new File(this.getClass().getResource("items2.avro").getPath())));
117
+
118
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
119
+ assertEquals(1, records.size());
120
+
121
+ Object[] record = records.get(0);
122
+ assertEquals("2018-02-23 12:13:52 UTC", record[0].toString());
123
+ assertEquals("2018-02-23 12:13:52 UTC", record[1].toString());
124
+ assertEquals("2018-02-23 12:13:52.717 UTC", record[2].toString());
125
+ assertEquals("2018-02-23 12:13:52.717249 UTC", record[3].toString());
126
+ assertEquals("2018-02-23 12:13:52.717249634 UTC", record[4].toString());
127
+ assertEquals("2018-02-23 12:13:52 UTC", record[5].toString());
128
+ assertTrue(record[6].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
129
+ assertTrue(record[7].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
130
+ assertTrue(record[8].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
131
+ assertTrue(record[9].toString().matches("2018-02-23 12:13:52.717249.* UTC"));
93
132
  }
94
133
 
95
134
  @Test
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ gem "avro"
@@ -0,0 +1,15 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ avro (1.8.2)
5
+ multi_json
6
+ multi_json (1.13.1)
7
+
8
+ PLATFORMS
9
+ ruby
10
+
11
+ DEPENDENCIES
12
+ avro
13
+
14
+ BUNDLED WITH
15
+ 1.16.1
@@ -0,0 +1,6 @@
1
+ {"id":1,"code":123456789012345678,"name":"Desktop","description":"Office and Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":30000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{"hoge":null,"foo":"bar"},"item_type":"D","dummy":null}
2
+ {"id":2,"code":123456789012345679,"name":"Laptop","description":null,"flag":false,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":50000.0,"spec":{"key":"opt1","value":null},"tags":null,"options":{},"item_type":"M","dummy":null}
3
+ {"id":3,"code":123456789012345680,"name":"Tablet","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":null,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag3"],"options":{},"item_type":"M","dummy":null}
4
+ {"id":4,"code":123456789012345681,"name":"Mobile","description":"Personal Usage","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":10000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":[],"options":{},"item_type":"M","dummy":null}
5
+ {"id":5,"code":123456789012345682,"name":"Notepad","description":null,"flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":20000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
6
+ {"id":6,"code":123456789012345683,"name":"SmartPhone","description":"Multipurpose","flag":true,"created_at":"2016-05-09T04:35:43+09:00","created_at_utc":1.46273613E9,"price":40000.0,"spec":{"key":"opt1","value":"optvalue1"},"tags":["tag1","tag2"],"options":{},"item_type":"M","dummy":null}
@@ -0,0 +1 @@
1
+ {"timestamp": 1519388032, "timestamp_long": 1519388032, "timestamp_milli": 1519388032717, "timestamp_micro": 1519388032717249, "timestamp_nano": 1519388032717249634, "timestamp_float": 1519388032.7172496, "timestamp_float_milli": 1519388032717.2496, "timestamp_float_micro": 1519388032717249.6, "timestamp_float_nano": 1519388032717249634.0,"timestamp_double": 1519388032.717249634, "timestamp_double_milli": 1519388032717.249634, "timestamp_double_micro": 1519388032717249.634, "timestamp_double_nano": 1519388032717249634.0}
@@ -0,0 +1,17 @@
1
+ require 'avro'
2
+ require 'json'
3
+
4
+ schema = Avro::Schema.parse(File.read(ARGV[0]))
5
+ file = File.open(ARGV[1], 'wb')
6
+ writer = Avro::IO::DatumWriter.new(schema)
7
+ dw = Avro::DataFile::Writer.new(file, writer, schema)
8
+
9
+ data = File.read(ARGV[2]).each_line.map do |l|
10
+ JSON.load(l)
11
+ end
12
+
13
+ data.each do |d|
14
+ dw << d
15
+ end
16
+
17
+ dw.close
@@ -0,0 +1,20 @@
1
+ {
2
+ "type" : "record",
3
+ "name" : "Item",
4
+ "namespace" : "example.avro",
5
+ "fields" : [
6
+ {"name": "timestamp", "type": "int"},
7
+ {"name": "timestamp_long", "type": "long"},
8
+ {"name": "timestamp_milli", "type": "long"},
9
+ {"name": "timestamp_micro", "type": "long"},
10
+ {"name": "timestamp_nano", "type": "long"},
11
+ {"name": "timestamp_float", "type": "float"},
12
+ {"name": "timestamp_float_milli", "type": "float"},
13
+ {"name": "timestamp_float_micro", "type": "float"},
14
+ {"name": "timestamp_float_nano", "type": "float"},
15
+ {"name": "timestamp_double", "type": "double"},
16
+ {"name": "timestamp_double_milli", "type": "double"},
17
+ {"name": "timestamp_double_micro", "type": "double"},
18
+ {"name": "timestamp_double_nano", "type": "double"}
19
+ ]
20
+ }
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-04 00:00:00.000000000 Z
11
+ date: 2018-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: Parses Avro files read by other file input plugins.
42
42
  email:
43
43
  - kakyoin.hierophant@gmail.com
@@ -64,6 +64,8 @@ files:
64
64
  - lib/embulk/guess/avro.rb
65
65
  - lib/embulk/parser/avro.rb
66
66
  - src/main/java/org/embulk/parser/avro/AvroParserPlugin.java
67
+ - src/main/java/org/embulk/parser/avro/TimestampUnit.java
68
+ - src/main/java/org/embulk/parser/avro/TimestampUnitDeserializer.java
67
69
  - src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java
68
70
  - src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java
69
71
  - src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java
@@ -75,16 +77,24 @@ files:
75
77
  - src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java
76
78
  - src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java
77
79
  - src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java
80
+ - src/test/resources/org/embulk/parser/avro/.gitignore
81
+ - src/test/resources/org/embulk/parser/avro/Gemfile
82
+ - src/test/resources/org/embulk/parser/avro/Gemfile.lock
83
+ - src/test/resources/org/embulk/parser/avro/data.json
84
+ - src/test/resources/org/embulk/parser/avro/data2.json
85
+ - src/test/resources/org/embulk/parser/avro/data_creator.rb
78
86
  - src/test/resources/org/embulk/parser/avro/item.avsc
87
+ - src/test/resources/org/embulk/parser/avro/item2.avsc
79
88
  - src/test/resources/org/embulk/parser/avro/items.avro
80
- - classpath/avro-1.8.0.jar
89
+ - src/test/resources/org/embulk/parser/avro/items2.avro
90
+ - classpath/avro-1.8.2.jar
91
+ - classpath/paranamer-2.7.jar
92
+ - classpath/xz-1.5.jar
81
93
  - classpath/commons-compress-1.8.1.jar
82
- - classpath/embulk-parser-avro-0.2.0.jar
94
+ - classpath/embulk-parser-avro-0.3.0.jar
83
95
  - classpath/jackson-core-asl-1.9.13.jar
84
- - classpath/jackson-mapper-asl-1.9.13.jar
85
- - classpath/paranamer-2.7.jar
86
96
  - classpath/snappy-java-1.1.1.3.jar
87
- - classpath/xz-1.5.jar
97
+ - classpath/jackson-mapper-asl-1.9.13.jar
88
98
  homepage: https://github.com/joker1007/embulk-parser-avro
89
99
  licenses:
90
100
  - MIT