embulk-parser-avro 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +88 -0
  5. data/build.gradle +96 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/example/.gitignore +3 -0
  9. data/example/example.yml +22 -0
  10. data/example/generate.rb +94 -0
  11. data/example/item.avsc +27 -0
  12. data/example/items.avro +0 -0
  13. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  14. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  15. data/gradlew +160 -0
  16. data/gradlew.bat +90 -0
  17. data/lib/embulk/guess/avro.rb +61 -0
  18. data/lib/embulk/parser/avro.rb +3 -0
  19. data/src/main/java/org/embulk/parser/avro/AvroColumnOption.java +16 -0
  20. data/src/main/java/org/embulk/parser/avro/AvroParserPlugin.java +93 -0
  21. data/src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java +72 -0
  22. data/src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java +83 -0
  23. data/src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java +37 -0
  24. data/src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java +63 -0
  25. data/src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java +58 -0
  26. data/src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java +58 -0
  27. data/src/main/java/org/embulk/parser/avro/getter/GenericDataColumnGetter.java +34 -0
  28. data/src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java +57 -0
  29. data/src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java +58 -0
  30. data/src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java +85 -0
  31. data/src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java +145 -0
  32. data/src/test/resources/org/embulk/parser/avro/item.avsc +27 -0
  33. data/src/test/resources/org/embulk/parser/avro/items.avro +0 -0
  34. metadata +112 -0
@@ -0,0 +1,34 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.time.TimestampParser;
6
+ import org.msgpack.value.Value;
7
+
8
+ public class GenericDataColumnGetter extends BaseColumnGetter {
9
+ public GenericDataColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
10
+ super(pageBuilder, timestampParsers);
11
+ }
12
+
13
+ @Override
14
+ public void stringColumn(Column column) {
15
+ if (this.value == null) {
16
+ pageBuilder.setNull(column);
17
+ }
18
+ else {
19
+ Value converted = AvroGenericDataConverter.convert(value);
20
+ pageBuilder.setString(column, converted.toString());
21
+ }
22
+ }
23
+
24
+ @Override
25
+ public void jsonColumn(Column column) {
26
+ if (this.value == null) {
27
+ pageBuilder.setNull(column);
28
+ }
29
+ else {
30
+ Value converted = AvroGenericDataConverter.convert(value);
31
+ pageBuilder.setJson(column, converted);
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.time.Timestamp;
6
+ import org.embulk.spi.time.TimestampParser;
7
+
8
+ public class IntegerColumnGetter extends BaseColumnGetter {
9
+ protected Integer value;
10
+
11
+ public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
12
+ super(pageBuilder, timestampParsers);
13
+ }
14
+
15
+ @Override
16
+ public void setValue(Object value)
17
+ {
18
+ this.value = (Integer) value;
19
+ }
20
+
21
+ @Override
22
+ public void longColumn(Column column) {
23
+ if (value == null) {
24
+ pageBuilder.setNull(column);
25
+ } else {
26
+ pageBuilder.setLong(column, value.longValue());
27
+ }
28
+ }
29
+
30
+ @Override
31
+ public void doubleColumn(Column column) {
32
+ if (value == null) {
33
+ pageBuilder.setNull(column);
34
+ } else {
35
+ pageBuilder.setDouble(column, value.doubleValue());
36
+ }
37
+ }
38
+
39
+ @Override
40
+ public void stringColumn(Column column) {
41
+ if (value == null) {
42
+ pageBuilder.setNull(column);
43
+ } else {
44
+ pageBuilder.setString(column, value.toString());
45
+ }
46
+ }
47
+
48
+ @Override
49
+ public void timestampColumn(Column column) {
50
+ if (this.value == null) {
51
+ pageBuilder.setNull(column);
52
+ }
53
+ else {
54
+ pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value.longValue()));
55
+ }
56
+ }
57
+ }
@@ -0,0 +1,58 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.time.Timestamp;
6
+ import org.embulk.spi.time.TimestampParser;
7
+
8
+ public class LongColumnGetter extends BaseColumnGetter {
9
+ protected Long value;
10
+
11
+ public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
12
+ super(pageBuilder, timestampParsers);
13
+ }
14
+
15
+ @Override
16
+ public void setValue(Object value)
17
+ {
18
+ this.value = (Long) value;
19
+ }
20
+
21
+ @Override
22
+ public void longColumn(Column column) {
23
+ if (value == null) {
24
+ pageBuilder.setNull(column);
25
+ } else {
26
+ pageBuilder.setLong(column, value);
27
+ }
28
+ }
29
+
30
+ @Override
31
+ public void doubleColumn(Column column) {
32
+ if (value == null) {
33
+ pageBuilder.setNull(column);
34
+ } else {
35
+ pageBuilder.setDouble(column, value.doubleValue());
36
+ }
37
+ }
38
+
39
+ @Override
40
+ public void stringColumn(Column column) {
41
+ if (value == null) {
42
+ pageBuilder.setNull(column);
43
+ } else {
44
+ Long casted = (Long) value;
45
+ pageBuilder.setString(column, value.toString());
46
+ }
47
+ }
48
+
49
+ @Override
50
+ public void timestampColumn(Column column) {
51
+ if (this.value == null) {
52
+ pageBuilder.setNull(column);
53
+ }
54
+ else {
55
+ pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value));
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,85 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.json.JsonParser;
6
+ import org.embulk.spi.time.TimestampParser;
7
+
8
+ public class StringColumnGetter extends BaseColumnGetter {
9
+ protected String value;
10
+ private final JsonParser jsonParser = new JsonParser();
11
+
12
+ public StringColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
13
+ super(pageBuilder, timestampParsers);
14
+ }
15
+
16
+ @Override
17
+ public void setValue(Object value)
18
+ {
19
+ if (value == null)
20
+ this.value = null;
21
+ else
22
+ this.value = value.toString();
23
+ }
24
+
25
+ @Override
26
+ public void booleanColumn(Column column) {
27
+ if (this.value == null) {
28
+ pageBuilder.setNull(column);
29
+ }
30
+ else {
31
+ pageBuilder.setBoolean(column, Boolean.parseBoolean(value));
32
+ }
33
+ }
34
+
35
+ @Override
36
+ public void longColumn(Column column) {
37
+ if (this.value == null) {
38
+ pageBuilder.setNull(column);
39
+ }
40
+ else {
41
+ pageBuilder.setLong(column, Long.parseLong(value));
42
+ }
43
+ }
44
+
45
+ @Override
46
+ public void doubleColumn(Column column) {
47
+ if (this.value == null) {
48
+ pageBuilder.setNull(column);
49
+ }
50
+ else {
51
+ pageBuilder.setDouble(column, Double.parseDouble(value));
52
+ }
53
+ }
54
+
55
+ @Override
56
+ public void stringColumn(Column column) {
57
+ if (this.value == null) {
58
+ pageBuilder.setNull(column);
59
+ }
60
+ else {
61
+ pageBuilder.setString(column, value);
62
+ }
63
+ }
64
+
65
+ @Override
66
+ public void timestampColumn(Column column) {
67
+ if (this.value == null) {
68
+ pageBuilder.setNull(column);
69
+ }
70
+ else {
71
+ TimestampParser parser = timestampParsers[column.getIndex()];
72
+ pageBuilder.setTimestamp(column, parser.parse(value));
73
+ }
74
+ }
75
+
76
+ @Override
77
+ public void jsonColumn(Column column) {
78
+ if (this.value == null) {
79
+ pageBuilder.setNull(column);
80
+ }
81
+ else {
82
+ pageBuilder.setJson(column, jsonParser.parse(value));
83
+ }
84
+ }
85
+ }
@@ -0,0 +1,145 @@
1
+ package org.embulk.parser.avro;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.Lists;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.spi.ColumnConfig;
9
+ import org.embulk.spi.FileInput;
10
+ import org.embulk.spi.ParserPlugin;
11
+ import org.embulk.spi.Schema;
12
+ import org.embulk.spi.SchemaConfig;
13
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
14
+ import org.embulk.spi.type.Type;
15
+ import org.embulk.spi.util.InputStreamFileInput;
16
+ import org.embulk.spi.util.Pages;
17
+ import org.junit.Before;
18
+ import org.junit.Rule;
19
+ import org.junit.Test;
20
+ import org.msgpack.value.MapValue;
21
+ import org.msgpack.value.ValueFactory;
22
+
23
+ import java.io.File;
24
+ import java.io.FileInputStream;
25
+ import java.io.IOException;
26
+ import java.io.InputStream;
27
+ import java.util.List;
28
+
29
+ import static org.embulk.spi.type.Types.BOOLEAN;
30
+ import static org.embulk.spi.type.Types.DOUBLE;
31
+ import static org.embulk.spi.type.Types.LONG;
32
+ import static org.embulk.spi.type.Types.STRING;
33
+ import static org.embulk.spi.type.Types.JSON;
34
+ import static org.embulk.spi.type.Types.TIMESTAMP;
35
+ import static org.junit.Assert.assertEquals;
36
+ import static org.junit.Assert.assertNull;
37
+
38
+ public class TestAvroParserPlugin
39
+ {
40
+
41
+ @Rule
42
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
43
+
44
+ private ConfigSource config;
45
+ private AvroParserPlugin plugin;
46
+ private MockPageOutput output;
47
+
48
+ @Before
49
+ public void createResource()
50
+ {
51
+ config = config().set("type", "avro");
52
+ plugin = new AvroParserPlugin();
53
+ recreatePageOutput();
54
+ }
55
+
56
+ @Test
57
+ public void useNormal()
58
+ throws Exception
59
+ {
60
+ SchemaConfig schema = schema(
61
+ column("id", LONG),
62
+ column("code", STRING),
63
+ column("name", STRING),
64
+ column("description", STRING),
65
+ column("flag", BOOLEAN),
66
+ column("price", DOUBLE),
67
+ column("item_type", STRING),
68
+ column("tags", JSON),
69
+ column("options", JSON),
70
+ column("spec", JSON),
71
+ column("created_at", TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S%:z")),
72
+ column("created_at_utc", TIMESTAMP)
73
+ );
74
+
75
+ ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item.avsc").getPath());
76
+
77
+ transaction(config, fileInput(new File(this.getClass().getResource("items.avro").getPath())));
78
+
79
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
80
+ assertEquals(6, records.size());
81
+
82
+ Object[] record = records.get(0);
83
+ assertEquals(1L, record[0]);
84
+ assertEquals("123456789012345678", record[1]);
85
+ assertEquals("Desktop", record[2]);
86
+ assertEquals(true, record[4]);
87
+ assertEquals("D", record[6]);
88
+ assertEquals("[\"tag1\",\"tag2\"]", record[7].toString());
89
+ assertEquals("bar", ((MapValue)record[8]).map().get(ValueFactory.newString("foo")).toString());
90
+ assertEquals("opt1", ((MapValue)record[9]).map().get(ValueFactory.newString("key")).toString());
91
+ assertEquals("2016-05-08 19:35:43 UTC", record[10].toString());
92
+ assertEquals("2016-05-08 19:35:25.952 UTC", record[11].toString());
93
+ }
94
+
95
+ private void recreatePageOutput()
96
+ {
97
+ output = new MockPageOutput();
98
+ }
99
+
100
+ private ConfigSource config()
101
+ {
102
+ return runtime.getExec().newConfigSource();
103
+ }
104
+
105
+ private void transaction(ConfigSource config, final FileInput input)
106
+ {
107
+ plugin.transaction(config, new ParserPlugin.Control()
108
+ {
109
+ @Override
110
+ public void run(TaskSource taskSource, Schema schema)
111
+ {
112
+ plugin.run(taskSource, schema, input, output);
113
+ }
114
+ });
115
+ }
116
+
117
+ private FileInput fileInput(File file)
118
+ throws Exception
119
+ {
120
+ FileInputStream in = new FileInputStream(file);
121
+ return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
122
+ }
123
+
124
+ private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
125
+ throws IOException
126
+ {
127
+ return new InputStreamFileInput.IteratorProvider(
128
+ ImmutableList.copyOf(inputStreams));
129
+ }
130
+
131
+ private SchemaConfig schema(ColumnConfig... columns)
132
+ {
133
+ return new SchemaConfig(Lists.newArrayList(columns));
134
+ }
135
+
136
+ private ColumnConfig column(String name, Type type)
137
+ {
138
+ return column(name, type, config());
139
+ }
140
+
141
+ private ColumnConfig column(String name, Type type, ConfigSource option)
142
+ {
143
+ return new ColumnConfig(name, type, option);
144
+ }
145
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "type" : "record",
3
+ "name" : "Item",
4
+ "namespace" : "example.avro",
5
+ "fields" : [
6
+ {"name": "id", "type": "int"},
7
+ {"name": "code", "type": "long"},
8
+ {"name": "name", "type": "string"},
9
+ {"name": "description", "type": ["string", "null"]},
10
+ {"name": "flag", "type": "boolean"},
11
+ {"name": "created_at", "type": "string"},
12
+ {"name": "created_at_utc", "type": "float"},
13
+ {"name": "price", "type": ["double", "null"]},
14
+ {"name": "spec", "type": {
15
+ "type": "record",
16
+ "name": "item_spec",
17
+ "fields" : [
18
+ {"name" : "key", "type" : "string"},
19
+ {"name" : "value", "type" : ["string", "null"]}
20
+ ]}
21
+ },
22
+ {"name": "tags", "type": [{"type": "array", "items": "string"}, "null"]},
23
+ {"name": "options", "type": {"type": "map", "values": ["string", "null"]}},
24
+ {"name": "item_type", "type": {"name": "item_type_enum", "type": "enum", "symbols": ["D", "M"]}},
25
+ {"name": "dummy", "type": "null"}
26
+ ]
27
+ }
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-avro
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - joker1007
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-05-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ description: Parses Avro files read by other file input plugins.
42
+ email:
43
+ - kakyoin.hierophant@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - config/checkstyle/checkstyle.xml
53
+ - config/checkstyle/default.xml
54
+ - example/.gitignore
55
+ - example/example.yml
56
+ - example/generate.rb
57
+ - example/item.avsc
58
+ - example/items.avro
59
+ - gradle/wrapper/gradle-wrapper.jar
60
+ - gradle/wrapper/gradle-wrapper.properties
61
+ - gradlew
62
+ - gradlew.bat
63
+ - lib/embulk/guess/avro.rb
64
+ - lib/embulk/parser/avro.rb
65
+ - src/main/java/org/embulk/parser/avro/AvroColumnOption.java
66
+ - src/main/java/org/embulk/parser/avro/AvroParserPlugin.java
67
+ - src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java
68
+ - src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java
69
+ - src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java
70
+ - src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java
71
+ - src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java
72
+ - src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java
73
+ - src/main/java/org/embulk/parser/avro/getter/GenericDataColumnGetter.java
74
+ - src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java
75
+ - src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java
76
+ - src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java
77
+ - src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java
78
+ - src/test/resources/org/embulk/parser/avro/item.avsc
79
+ - src/test/resources/org/embulk/parser/avro/items.avro
80
+ - classpath/avro-1.8.0.jar
81
+ - classpath/commons-compress-1.8.1.jar
82
+ - classpath/embulk-parser-avro-0.1.0.jar
83
+ - classpath/jackson-core-asl-1.9.13.jar
84
+ - classpath/jackson-mapper-asl-1.9.13.jar
85
+ - classpath/paranamer-2.7.jar
86
+ - classpath/snappy-java-1.1.1.3.jar
87
+ - classpath/xz-1.5.jar
88
+ homepage: https://github.com/joker1007/embulk-parser-avro
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.1.9
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Avro parser plugin for Embulk
112
+ test_files: []