embulk-parser-avro 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +88 -0
  5. data/build.gradle +96 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/example/.gitignore +3 -0
  9. data/example/example.yml +22 -0
  10. data/example/generate.rb +94 -0
  11. data/example/item.avsc +27 -0
  12. data/example/items.avro +0 -0
  13. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  14. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  15. data/gradlew +160 -0
  16. data/gradlew.bat +90 -0
  17. data/lib/embulk/guess/avro.rb +61 -0
  18. data/lib/embulk/parser/avro.rb +3 -0
  19. data/src/main/java/org/embulk/parser/avro/AvroColumnOption.java +16 -0
  20. data/src/main/java/org/embulk/parser/avro/AvroParserPlugin.java +93 -0
  21. data/src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java +72 -0
  22. data/src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java +83 -0
  23. data/src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java +37 -0
  24. data/src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java +63 -0
  25. data/src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java +58 -0
  26. data/src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java +58 -0
  27. data/src/main/java/org/embulk/parser/avro/getter/GenericDataColumnGetter.java +34 -0
  28. data/src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java +57 -0
  29. data/src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java +58 -0
  30. data/src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java +85 -0
  31. data/src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java +145 -0
  32. data/src/test/resources/org/embulk/parser/avro/item.avsc +27 -0
  33. data/src/test/resources/org/embulk/parser/avro/items.avro +0 -0
  34. metadata +112 -0
@@ -0,0 +1,34 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.time.TimestampParser;
6
+ import org.msgpack.value.Value;
7
+
8
+ public class GenericDataColumnGetter extends BaseColumnGetter {
9
+ public GenericDataColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
10
+ super(pageBuilder, timestampParsers);
11
+ }
12
+
13
+ @Override
14
+ public void stringColumn(Column column) {
15
+ if (this.value == null) {
16
+ pageBuilder.setNull(column);
17
+ }
18
+ else {
19
+ Value converted = AvroGenericDataConverter.convert(value);
20
+ pageBuilder.setString(column, converted.toString());
21
+ }
22
+ }
23
+
24
+ @Override
25
+ public void jsonColumn(Column column) {
26
+ if (this.value == null) {
27
+ pageBuilder.setNull(column);
28
+ }
29
+ else {
30
+ Value converted = AvroGenericDataConverter.convert(value);
31
+ pageBuilder.setJson(column, converted);
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.time.Timestamp;
6
+ import org.embulk.spi.time.TimestampParser;
7
+
8
+ public class IntegerColumnGetter extends BaseColumnGetter {
9
+ protected Integer value;
10
+
11
+ public IntegerColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
12
+ super(pageBuilder, timestampParsers);
13
+ }
14
+
15
+ @Override
16
+ public void setValue(Object value)
17
+ {
18
+ this.value = (Integer) value;
19
+ }
20
+
21
+ @Override
22
+ public void longColumn(Column column) {
23
+ if (value == null) {
24
+ pageBuilder.setNull(column);
25
+ } else {
26
+ pageBuilder.setLong(column, value.longValue());
27
+ }
28
+ }
29
+
30
+ @Override
31
+ public void doubleColumn(Column column) {
32
+ if (value == null) {
33
+ pageBuilder.setNull(column);
34
+ } else {
35
+ pageBuilder.setDouble(column, value.doubleValue());
36
+ }
37
+ }
38
+
39
+ @Override
40
+ public void stringColumn(Column column) {
41
+ if (value == null) {
42
+ pageBuilder.setNull(column);
43
+ } else {
44
+ pageBuilder.setString(column, value.toString());
45
+ }
46
+ }
47
+
48
+ @Override
49
+ public void timestampColumn(Column column) {
50
+ if (this.value == null) {
51
+ pageBuilder.setNull(column);
52
+ }
53
+ else {
54
+ pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value.longValue()));
55
+ }
56
+ }
57
+ }
@@ -0,0 +1,58 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.time.Timestamp;
6
+ import org.embulk.spi.time.TimestampParser;
7
+
8
+ public class LongColumnGetter extends BaseColumnGetter {
9
+ protected Long value;
10
+
11
+ public LongColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
12
+ super(pageBuilder, timestampParsers);
13
+ }
14
+
15
+ @Override
16
+ public void setValue(Object value)
17
+ {
18
+ this.value = (Long) value;
19
+ }
20
+
21
+ @Override
22
+ public void longColumn(Column column) {
23
+ if (value == null) {
24
+ pageBuilder.setNull(column);
25
+ } else {
26
+ pageBuilder.setLong(column, value);
27
+ }
28
+ }
29
+
30
+ @Override
31
+ public void doubleColumn(Column column) {
32
+ if (value == null) {
33
+ pageBuilder.setNull(column);
34
+ } else {
35
+ pageBuilder.setDouble(column, value.doubleValue());
36
+ }
37
+ }
38
+
39
+ @Override
40
+ public void stringColumn(Column column) {
41
+ if (value == null) {
42
+ pageBuilder.setNull(column);
43
+ } else {
44
+ Long casted = (Long) value;
45
+ pageBuilder.setString(column, value.toString());
46
+ }
47
+ }
48
+
49
+ @Override
50
+ public void timestampColumn(Column column) {
51
+ if (this.value == null) {
52
+ pageBuilder.setNull(column);
53
+ }
54
+ else {
55
+ pageBuilder.setTimestamp(column, Timestamp.ofEpochSecond(value));
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,85 @@
1
+ package org.embulk.parser.avro.getter;
2
+
3
+ import org.embulk.spi.Column;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.json.JsonParser;
6
+ import org.embulk.spi.time.TimestampParser;
7
+
8
+ public class StringColumnGetter extends BaseColumnGetter {
9
+ protected String value;
10
+ private final JsonParser jsonParser = new JsonParser();
11
+
12
+ public StringColumnGetter(PageBuilder pageBuilder, TimestampParser[] timestampParsers) {
13
+ super(pageBuilder, timestampParsers);
14
+ }
15
+
16
+ @Override
17
+ public void setValue(Object value)
18
+ {
19
+ if (value == null)
20
+ this.value = null;
21
+ else
22
+ this.value = value.toString();
23
+ }
24
+
25
+ @Override
26
+ public void booleanColumn(Column column) {
27
+ if (this.value == null) {
28
+ pageBuilder.setNull(column);
29
+ }
30
+ else {
31
+ pageBuilder.setBoolean(column, Boolean.parseBoolean(value));
32
+ }
33
+ }
34
+
35
+ @Override
36
+ public void longColumn(Column column) {
37
+ if (this.value == null) {
38
+ pageBuilder.setNull(column);
39
+ }
40
+ else {
41
+ pageBuilder.setLong(column, Long.parseLong(value));
42
+ }
43
+ }
44
+
45
+ @Override
46
+ public void doubleColumn(Column column) {
47
+ if (this.value == null) {
48
+ pageBuilder.setNull(column);
49
+ }
50
+ else {
51
+ pageBuilder.setDouble(column, Double.parseDouble(value));
52
+ }
53
+ }
54
+
55
+ @Override
56
+ public void stringColumn(Column column) {
57
+ if (this.value == null) {
58
+ pageBuilder.setNull(column);
59
+ }
60
+ else {
61
+ pageBuilder.setString(column, value);
62
+ }
63
+ }
64
+
65
+ @Override
66
+ public void timestampColumn(Column column) {
67
+ if (this.value == null) {
68
+ pageBuilder.setNull(column);
69
+ }
70
+ else {
71
+ TimestampParser parser = timestampParsers[column.getIndex()];
72
+ pageBuilder.setTimestamp(column, parser.parse(value));
73
+ }
74
+ }
75
+
76
+ @Override
77
+ public void jsonColumn(Column column) {
78
+ if (this.value == null) {
79
+ pageBuilder.setNull(column);
80
+ }
81
+ else {
82
+ pageBuilder.setJson(column, jsonParser.parse(value));
83
+ }
84
+ }
85
+ }
@@ -0,0 +1,145 @@
1
+ package org.embulk.parser.avro;
2
+
3
+ import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.Lists;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.TaskSource;
8
+ import org.embulk.spi.ColumnConfig;
9
+ import org.embulk.spi.FileInput;
10
+ import org.embulk.spi.ParserPlugin;
11
+ import org.embulk.spi.Schema;
12
+ import org.embulk.spi.SchemaConfig;
13
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
14
+ import org.embulk.spi.type.Type;
15
+ import org.embulk.spi.util.InputStreamFileInput;
16
+ import org.embulk.spi.util.Pages;
17
+ import org.junit.Before;
18
+ import org.junit.Rule;
19
+ import org.junit.Test;
20
+ import org.msgpack.value.MapValue;
21
+ import org.msgpack.value.ValueFactory;
22
+
23
+ import java.io.File;
24
+ import java.io.FileInputStream;
25
+ import java.io.IOException;
26
+ import java.io.InputStream;
27
+ import java.util.List;
28
+
29
+ import static org.embulk.spi.type.Types.BOOLEAN;
30
+ import static org.embulk.spi.type.Types.DOUBLE;
31
+ import static org.embulk.spi.type.Types.LONG;
32
+ import static org.embulk.spi.type.Types.STRING;
33
+ import static org.embulk.spi.type.Types.JSON;
34
+ import static org.embulk.spi.type.Types.TIMESTAMP;
35
+ import static org.junit.Assert.assertEquals;
36
+ import static org.junit.Assert.assertNull;
37
+
38
+ public class TestAvroParserPlugin
39
+ {
40
+
41
+ @Rule
42
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
43
+
44
+ private ConfigSource config;
45
+ private AvroParserPlugin plugin;
46
+ private MockPageOutput output;
47
+
48
+ @Before
49
+ public void createResource()
50
+ {
51
+ config = config().set("type", "avro");
52
+ plugin = new AvroParserPlugin();
53
+ recreatePageOutput();
54
+ }
55
+
56
+ @Test
57
+ public void useNormal()
58
+ throws Exception
59
+ {
60
+ SchemaConfig schema = schema(
61
+ column("id", LONG),
62
+ column("code", STRING),
63
+ column("name", STRING),
64
+ column("description", STRING),
65
+ column("flag", BOOLEAN),
66
+ column("price", DOUBLE),
67
+ column("item_type", STRING),
68
+ column("tags", JSON),
69
+ column("options", JSON),
70
+ column("spec", JSON),
71
+ column("created_at", TIMESTAMP, config().set("format", "%Y-%m-%dT%H:%M:%S%:z")),
72
+ column("created_at_utc", TIMESTAMP)
73
+ );
74
+
75
+ ConfigSource config = this.config.deepCopy().set("columns", schema).set("avsc", this.getClass().getResource("item.avsc").getPath());
76
+
77
+ transaction(config, fileInput(new File(this.getClass().getResource("items.avro").getPath())));
78
+
79
+ List<Object[]> records = Pages.toObjects(schema.toSchema(), output.pages);
80
+ assertEquals(6, records.size());
81
+
82
+ Object[] record = records.get(0);
83
+ assertEquals(1L, record[0]);
84
+ assertEquals("123456789012345678", record[1]);
85
+ assertEquals("Desktop", record[2]);
86
+ assertEquals(true, record[4]);
87
+ assertEquals("D", record[6]);
88
+ assertEquals("[\"tag1\",\"tag2\"]", record[7].toString());
89
+ assertEquals("bar", ((MapValue)record[8]).map().get(ValueFactory.newString("foo")).toString());
90
+ assertEquals("opt1", ((MapValue)record[9]).map().get(ValueFactory.newString("key")).toString());
91
+ assertEquals("2016-05-08 19:35:43 UTC", record[10].toString());
92
+ assertEquals("2016-05-08 19:35:25.952 UTC", record[11].toString());
93
+ }
94
+
95
+ private void recreatePageOutput()
96
+ {
97
+ output = new MockPageOutput();
98
+ }
99
+
100
+ private ConfigSource config()
101
+ {
102
+ return runtime.getExec().newConfigSource();
103
+ }
104
+
105
+ private void transaction(ConfigSource config, final FileInput input)
106
+ {
107
+ plugin.transaction(config, new ParserPlugin.Control()
108
+ {
109
+ @Override
110
+ public void run(TaskSource taskSource, Schema schema)
111
+ {
112
+ plugin.run(taskSource, schema, input, output);
113
+ }
114
+ });
115
+ }
116
+
117
+ private FileInput fileInput(File file)
118
+ throws Exception
119
+ {
120
+ FileInputStream in = new FileInputStream(file);
121
+ return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in));
122
+ }
123
+
124
+ private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams)
125
+ throws IOException
126
+ {
127
+ return new InputStreamFileInput.IteratorProvider(
128
+ ImmutableList.copyOf(inputStreams));
129
+ }
130
+
131
+ private SchemaConfig schema(ColumnConfig... columns)
132
+ {
133
+ return new SchemaConfig(Lists.newArrayList(columns));
134
+ }
135
+
136
+ private ColumnConfig column(String name, Type type)
137
+ {
138
+ return column(name, type, config());
139
+ }
140
+
141
+ private ColumnConfig column(String name, Type type, ConfigSource option)
142
+ {
143
+ return new ColumnConfig(name, type, option);
144
+ }
145
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "type" : "record",
3
+ "name" : "Item",
4
+ "namespace" : "example.avro",
5
+ "fields" : [
6
+ {"name": "id", "type": "int"},
7
+ {"name": "code", "type": "long"},
8
+ {"name": "name", "type": "string"},
9
+ {"name": "description", "type": ["string", "null"]},
10
+ {"name": "flag", "type": "boolean"},
11
+ {"name": "created_at", "type": "string"},
12
+ {"name": "created_at_utc", "type": "float"},
13
+ {"name": "price", "type": ["double", "null"]},
14
+ {"name": "spec", "type": {
15
+ "type": "record",
16
+ "name": "item_spec",
17
+ "fields" : [
18
+ {"name" : "key", "type" : "string"},
19
+ {"name" : "value", "type" : ["string", "null"]}
20
+ ]}
21
+ },
22
+ {"name": "tags", "type": [{"type": "array", "items": "string"}, "null"]},
23
+ {"name": "options", "type": {"type": "map", "values": ["string", "null"]}},
24
+ {"name": "item_type", "type": {"name": "item_type_enum", "type": "enum", "symbols": ["D", "M"]}},
25
+ {"name": "dummy", "type": "null"}
26
+ ]
27
+ }
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-avro
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - joker1007
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-05-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ description: Parses Avro files read by other file input plugins.
42
+ email:
43
+ - kakyoin.hierophant@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - LICENSE.txt
50
+ - README.md
51
+ - build.gradle
52
+ - config/checkstyle/checkstyle.xml
53
+ - config/checkstyle/default.xml
54
+ - example/.gitignore
55
+ - example/example.yml
56
+ - example/generate.rb
57
+ - example/item.avsc
58
+ - example/items.avro
59
+ - gradle/wrapper/gradle-wrapper.jar
60
+ - gradle/wrapper/gradle-wrapper.properties
61
+ - gradlew
62
+ - gradlew.bat
63
+ - lib/embulk/guess/avro.rb
64
+ - lib/embulk/parser/avro.rb
65
+ - src/main/java/org/embulk/parser/avro/AvroColumnOption.java
66
+ - src/main/java/org/embulk/parser/avro/AvroParserPlugin.java
67
+ - src/main/java/org/embulk/parser/avro/getter/AvroGenericDataConverter.java
68
+ - src/main/java/org/embulk/parser/avro/getter/BaseColumnGetter.java
69
+ - src/main/java/org/embulk/parser/avro/getter/BooleanColumnGetter.java
70
+ - src/main/java/org/embulk/parser/avro/getter/ColumnGetterFactory.java
71
+ - src/main/java/org/embulk/parser/avro/getter/DoubleColumnGetter.java
72
+ - src/main/java/org/embulk/parser/avro/getter/FloatColumnGetter.java
73
+ - src/main/java/org/embulk/parser/avro/getter/GenericDataColumnGetter.java
74
+ - src/main/java/org/embulk/parser/avro/getter/IntegerColumnGetter.java
75
+ - src/main/java/org/embulk/parser/avro/getter/LongColumnGetter.java
76
+ - src/main/java/org/embulk/parser/avro/getter/StringColumnGetter.java
77
+ - src/test/java/org/embulk/parser/avro/TestAvroParserPlugin.java
78
+ - src/test/resources/org/embulk/parser/avro/item.avsc
79
+ - src/test/resources/org/embulk/parser/avro/items.avro
80
+ - classpath/avro-1.8.0.jar
81
+ - classpath/commons-compress-1.8.1.jar
82
+ - classpath/embulk-parser-avro-0.1.0.jar
83
+ - classpath/jackson-core-asl-1.9.13.jar
84
+ - classpath/jackson-mapper-asl-1.9.13.jar
85
+ - classpath/paranamer-2.7.jar
86
+ - classpath/snappy-java-1.1.1.3.jar
87
+ - classpath/xz-1.5.jar
88
+ homepage: https://github.com/joker1007/embulk-parser-avro
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.1.9
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Avro parser plugin for Embulk
112
+ test_files: []