embulk-formatter-avro 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +51 -0
  5. data/build.gradle +97 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/example/config.yml +34 -0
  9. data/example/sample_01.csv +7 -0
  10. data/example/schema.avsc +33 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +160 -0
  14. data/gradlew.bat +90 -0
  15. data/lib/embulk/formatter/avro.rb +3 -0
  16. data/src/main/java/org/embulk/formatter/avro/AvroFormatterColumnVisitor.java +97 -0
  17. data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +183 -0
  18. data/src/main/java/org/embulk/formatter/avro/converter/AbstractAvroValueConverter.java +36 -0
  19. data/src/main/java/org/embulk/formatter/avro/converter/AvroArrayConverter.java +52 -0
  20. data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +14 -0
  21. data/src/main/java/org/embulk/formatter/avro/converter/AvroDoubleConverter.java +29 -0
  22. data/src/main/java/org/embulk/formatter/avro/converter/AvroEnumConverter.java +25 -0
  23. data/src/main/java/org/embulk/formatter/avro/converter/AvroFixedConverter.java +16 -0
  24. data/src/main/java/org/embulk/formatter/avro/converter/AvroFloatConverter.java +29 -0
  25. data/src/main/java/org/embulk/formatter/avro/converter/AvroIntConverter.java +29 -0
  26. data/src/main/java/org/embulk/formatter/avro/converter/AvroLongConverter.java +29 -0
  27. data/src/main/java/org/embulk/formatter/avro/converter/AvroMapConverter.java +52 -0
  28. data/src/main/java/org/embulk/formatter/avro/converter/AvroNullConverter.java +40 -0
  29. data/src/main/java/org/embulk/formatter/avro/converter/AvroRecordConverter.java +59 -0
  30. data/src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java +41 -0
  31. data/src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java +52 -0
  32. data/src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java +5 -0
  33. metadata +111 -0
@@ -0,0 +1,36 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
5
+
6
+ abstract public class AbstractAvroValueConverter {
7
+ protected Schema avroSchema;
8
+
9
+ public AbstractAvroValueConverter(Schema schema) {
10
+ this.avroSchema = schema;
11
+ }
12
+
13
+ public Object booleanColumn(boolean value) {
14
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
15
+ }
16
+
17
+ public Object longColumn(long value) {
18
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
19
+ }
20
+
21
+ public Object doubleColumn(double value) {
22
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
23
+ }
24
+
25
+ public Object stringColumn(String value) {
26
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
27
+ }
28
+
29
+ public Object timestampColumn(String value) {
30
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
31
+ }
32
+
33
+ public Object jsonColumn(Value value) {
34
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
35
+ }
36
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericArray;
5
+ import org.apache.avro.generic.GenericData;
6
+ import org.msgpack.value.Value;
7
+
8
+ import java.util.List;
9
+
10
+ public class AvroArrayConverter extends AbstractAvroValueConverter {
11
+ private AbstractAvroValueConverter elementConverter;
12
+
13
+ public AvroArrayConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
14
+ super(schema);
15
+ this.elementConverter = elementConverter;
16
+ }
17
+
18
+ @Override
19
+ public GenericArray<Object> jsonColumn(Value value) {
20
+ if (!value.isArrayValue())
21
+ throw new RuntimeException("Support only array type json record");
22
+
23
+ List<Value> list = value.asArrayValue().list();
24
+
25
+ GenericArray<Object> array = new GenericData.Array<>(list.size(), avroSchema);
26
+ for (Value val : list) {
27
+ switch (val.getValueType()) {
28
+ case STRING:
29
+ array.add(elementConverter.stringColumn(val.asStringValue().toString()));
30
+ break;
31
+ case INTEGER:
32
+ array.add(elementConverter.longColumn(val.asIntegerValue().toLong()));
33
+ break;
34
+ case FLOAT:
35
+ array.add(elementConverter.doubleColumn(val.asFloatValue().toDouble()));
36
+ break;
37
+ case BOOLEAN:
38
+ array.add(elementConverter.booleanColumn(val.asBooleanValue().getBoolean()));
39
+ break;
40
+ case ARRAY:
41
+ array.add(elementConverter.jsonColumn(val.asArrayValue()));
42
+ break;
43
+ case MAP:
44
+ array.add(elementConverter.jsonColumn(val.asMapValue()));
45
+ break;
46
+ default:
47
+ throw new RuntimeException("Irregular Messagepack type");
48
+ }
49
+ }
50
+ return array;
51
+ }
52
+ }
@@ -0,0 +1,14 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroBooleanConverter extends AbstractAvroValueConverter {
6
+ public AvroBooleanConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Object booleanColumn(boolean value) {
12
+ throw new RuntimeException("Unsupported typecasting");
13
+ }
14
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroDoubleConverter extends AbstractAvroValueConverter {
6
+ public AvroDoubleConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Double longColumn(long value) {
12
+ return Long.valueOf(value).doubleValue();
13
+ }
14
+
15
+ @Override
16
+ public Double doubleColumn(double value) {
17
+ return value;
18
+ }
19
+
20
+ @Override
21
+ public Double stringColumn(String value) {
22
+ return Double.parseDouble(value);
23
+ }
24
+
25
+ @Override
26
+ public Double timestampColumn(String value) {
27
+ return Double.parseDouble(value);
28
+ }
29
+ }
@@ -0,0 +1,25 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericEnumSymbol;
6
+
7
+ import java.util.List;
8
+
9
+ public class AvroEnumConverter extends AbstractAvroValueConverter {
10
+ private List<String> enumSymbols;
11
+
12
+ AvroEnumConverter(Schema schema, List<String> enumSymbols) {
13
+ super(schema);
14
+ this.enumSymbols = enumSymbols;
15
+ }
16
+
17
+ @Override
18
+ public GenericEnumSymbol stringColumn(String value) {
19
+ if (enumSymbols.contains(value)) {
20
+ return new GenericData.EnumSymbol(avroSchema, value);
21
+ } else {
22
+ throw new RuntimeException(String.format("%s is not in %s", value, enumSymbols.toString()));
23
+ }
24
+ }
25
+ }
@@ -0,0 +1,16 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericFixed;
6
+
7
+ public class AvroFixedConverter extends AbstractAvroValueConverter {
8
+ AvroFixedConverter(Schema schema) {
9
+ super(schema);
10
+ }
11
+
12
+ @Override
13
+ public GenericFixed stringColumn(String value) {
14
+ return new GenericData.Fixed(avroSchema, value.getBytes());
15
+ }
16
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroFloatConverter extends AbstractAvroValueConverter {
6
+ public AvroFloatConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Float longColumn(long value) {
12
+ return Long.valueOf(value).floatValue();
13
+ }
14
+
15
+ @Override
16
+ public Float doubleColumn(double value) {
17
+ return Double.valueOf(value).floatValue();
18
+ }
19
+
20
+ @Override
21
+ public Float stringColumn(String value) {
22
+ return Float.parseFloat(value);
23
+ }
24
+
25
+ @Override
26
+ public Float timestampColumn(String value) {
27
+ return Float.parseFloat(value);
28
+ }
29
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroIntConverter extends AbstractAvroValueConverter {
6
+ public AvroIntConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Integer longColumn(long value) {
12
+ return Long.valueOf(value).intValue();
13
+ }
14
+
15
+ @Override
16
+ public Integer doubleColumn(double value) {
17
+ return Double.valueOf(value).intValue();
18
+ }
19
+
20
+ @Override
21
+ public Integer stringColumn(String value) {
22
+ return Integer.parseInt(value);
23
+ }
24
+
25
+ @Override
26
+ public Integer timestampColumn(String value) {
27
+ return Integer.parseInt(value);
28
+ }
29
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroLongConverter extends AbstractAvroValueConverter {
6
+ public AvroLongConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Long longColumn(long value) {
12
+ return value;
13
+ }
14
+
15
+ @Override
16
+ public Long doubleColumn(double value) {
17
+ return Math.round(value);
18
+ }
19
+
20
+ @Override
21
+ public Long stringColumn(String value) {
22
+ return Long.parseLong(value);
23
+ }
24
+
25
+ @Override
26
+ public Long timestampColumn(String value) {
27
+ return Long.parseLong(value);
28
+ }
29
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import avro.shaded.com.google.common.collect.ImmutableMap;
4
+ import org.apache.avro.Schema;
5
+ import org.msgpack.value.Value;
6
+
7
+ import java.util.Map;
8
+
9
+ public class AvroMapConverter extends AbstractAvroValueConverter {
10
+ private AbstractAvroValueConverter elementConverter;
11
+
12
+ public AvroMapConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
13
+ super(schema);
14
+ this.elementConverter = elementConverter;
15
+ }
16
+
17
+ @Override
18
+ public Map<String, Object> jsonColumn(Value value) {
19
+ if (!value.isMapValue())
20
+ throw new RuntimeException("Support only map type json record");
21
+
22
+ Map<Value, Value> map = value.asMapValue().map();
23
+
24
+ ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
25
+
26
+ for (Map.Entry<Value, Value> entry : map.entrySet()) {
27
+ switch (entry.getValue().getValueType()) {
28
+ case STRING:
29
+ builder.put(entry.getKey().toString(), elementConverter.stringColumn(entry.getValue().asStringValue().toString()));
30
+ break;
31
+ case INTEGER:
32
+ builder.put(entry.getKey().toString(), elementConverter.longColumn(entry.getValue().asIntegerValue().toLong()));
33
+ break;
34
+ case FLOAT:
35
+ builder.put(entry.getKey().toString(), elementConverter.doubleColumn(entry.getValue().asFloatValue().toDouble()));
36
+ break;
37
+ case BOOLEAN:
38
+ builder.put(entry.getKey().toString(), elementConverter.booleanColumn(entry.getValue().asBooleanValue().getBoolean()));
39
+ break;
40
+ case ARRAY:
41
+ builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asArrayValue()));
42
+ break;
43
+ case MAP:
44
+ builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asMapValue()));
45
+ break;
46
+ default:
47
+ throw new RuntimeException("Irregular Messagepack type");
48
+ }
49
+ }
50
+ return builder.build();
51
+ }
52
+ }
@@ -0,0 +1,40 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
5
+
6
+ public class AvroNullConverter extends AbstractAvroValueConverter {
7
+ public AvroNullConverter(Schema schema) {
8
+ super(schema);
9
+ }
10
+
11
+ @Override
12
+ public Object booleanColumn(boolean value) {
13
+ return null;
14
+ }
15
+
16
+ @Override
17
+ public Object longColumn(long value) {
18
+ return null;
19
+ }
20
+
21
+ @Override
22
+ public Object doubleColumn(double value) {
23
+ return null;
24
+ }
25
+
26
+ @Override
27
+ public Object stringColumn(String value) {
28
+ return null;
29
+ }
30
+
31
+ @Override
32
+ public Object timestampColumn(String value) {
33
+ return null;
34
+ }
35
+
36
+ @Override
37
+ public Object jsonColumn(Value value) {
38
+ return null;
39
+ }
40
+ }
@@ -0,0 +1,59 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericRecord;
6
+ import org.msgpack.value.Value;
7
+ import org.msgpack.value.ValueFactory;
8
+
9
+ import java.util.Map;
10
+
11
+ public class AvroRecordConverter extends AbstractAvroValueConverter {
12
+ private Map<String, AbstractAvroValueConverter> converterTable;
13
+
14
+ public AvroRecordConverter(Schema schema, Map<String, AbstractAvroValueConverter> converterTable) {
15
+ super(schema);
16
+ this.converterTable = converterTable;
17
+ }
18
+
19
+ @Override
20
+ public GenericRecord jsonColumn(Value value) {
21
+ if (!value.isMapValue())
22
+ throw new RuntimeException("Support only map type json record");
23
+
24
+ Map<Value, Value> map = value.asMapValue().map();
25
+
26
+ GenericRecord record = new GenericData.Record(avroSchema);
27
+ for (Map.Entry<String, AbstractAvroValueConverter> entry : converterTable.entrySet()) {
28
+ Value key = ValueFactory.newString(entry.getKey());
29
+ if (!map.containsKey(key)) {
30
+ record.put(entry.getKey(), null);
31
+ } else {
32
+ Value child = map.get(ValueFactory.newString(entry.getKey()));
33
+ switch (child.getValueType()) {
34
+ case STRING:
35
+ record.put(entry.getKey(), entry.getValue().stringColumn(child.asStringValue().toString()));
36
+ break;
37
+ case INTEGER:
38
+ record.put(entry.getKey(), entry.getValue().longColumn(child.asIntegerValue().toLong()));
39
+ break;
40
+ case FLOAT:
41
+ record.put(entry.getKey(), entry.getValue().doubleColumn(child.asFloatValue().toDouble()));
42
+ break;
43
+ case BOOLEAN:
44
+ record.put(entry.getKey(), entry.getValue().booleanColumn(child.asBooleanValue().getBoolean()));
45
+ break;
46
+ case ARRAY:
47
+ record.put(entry.getKey(), entry.getValue().jsonColumn(child.asArrayValue()));
48
+ break;
49
+ case MAP:
50
+ record.put(entry.getKey(), entry.getValue().jsonColumn(child.asMapValue()));
51
+ break;
52
+ default:
53
+ throw new RuntimeException("Irregular Messagepack type");
54
+ }
55
+ }
56
+ }
57
+ return record;
58
+ }
59
+ }
@@ -0,0 +1,41 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
5
+
6
+ public class AvroStringConverter extends AbstractAvroValueConverter {
7
+ public AvroStringConverter(Schema schema) {
8
+ super(schema);
9
+ }
10
+
11
+ @Override
12
+ public String booleanColumn(boolean value) {
13
+ return String.valueOf(value);
14
+ }
15
+
16
+ @Override
17
+ public String longColumn(long value) {
18
+ return String.valueOf(value);
19
+ }
20
+
21
+ @Override
22
+ public String doubleColumn(double value) {
23
+ return String.valueOf(value);
24
+ }
25
+
26
+ @Override
27
+ public String stringColumn(String value) {
28
+ return value;
29
+ }
30
+
31
+ @Override
32
+ public String timestampColumn(String value) {
33
+ return value;
34
+ }
35
+
36
+
37
+ @Override
38
+ public String jsonColumn(Value value) {
39
+ return value.toJson();
40
+ }
41
+ }