embulk-formatter-avro 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +51 -0
  5. data/build.gradle +97 -0
  6. data/config/checkstyle/checkstyle.xml +128 -0
  7. data/config/checkstyle/default.xml +108 -0
  8. data/example/config.yml +34 -0
  9. data/example/sample_01.csv +7 -0
  10. data/example/schema.avsc +33 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +160 -0
  14. data/gradlew.bat +90 -0
  15. data/lib/embulk/formatter/avro.rb +3 -0
  16. data/src/main/java/org/embulk/formatter/avro/AvroFormatterColumnVisitor.java +97 -0
  17. data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +183 -0
  18. data/src/main/java/org/embulk/formatter/avro/converter/AbstractAvroValueConverter.java +36 -0
  19. data/src/main/java/org/embulk/formatter/avro/converter/AvroArrayConverter.java +52 -0
  20. data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +14 -0
  21. data/src/main/java/org/embulk/formatter/avro/converter/AvroDoubleConverter.java +29 -0
  22. data/src/main/java/org/embulk/formatter/avro/converter/AvroEnumConverter.java +25 -0
  23. data/src/main/java/org/embulk/formatter/avro/converter/AvroFixedConverter.java +16 -0
  24. data/src/main/java/org/embulk/formatter/avro/converter/AvroFloatConverter.java +29 -0
  25. data/src/main/java/org/embulk/formatter/avro/converter/AvroIntConverter.java +29 -0
  26. data/src/main/java/org/embulk/formatter/avro/converter/AvroLongConverter.java +29 -0
  27. data/src/main/java/org/embulk/formatter/avro/converter/AvroMapConverter.java +52 -0
  28. data/src/main/java/org/embulk/formatter/avro/converter/AvroNullConverter.java +40 -0
  29. data/src/main/java/org/embulk/formatter/avro/converter/AvroRecordConverter.java +59 -0
  30. data/src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java +41 -0
  31. data/src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java +52 -0
  32. data/src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java +5 -0
  33. metadata +111 -0
@@ -0,0 +1,36 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
5
+
6
+ abstract public class AbstractAvroValueConverter {
7
+ protected Schema avroSchema;
8
+
9
+ public AbstractAvroValueConverter(Schema schema) {
10
+ this.avroSchema = schema;
11
+ }
12
+
13
+ public Object booleanColumn(boolean value) {
14
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
15
+ }
16
+
17
+ public Object longColumn(long value) {
18
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
19
+ }
20
+
21
+ public Object doubleColumn(double value) {
22
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
23
+ }
24
+
25
+ public Object stringColumn(String value) {
26
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
27
+ }
28
+
29
+ public Object timestampColumn(String value) {
30
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
31
+ }
32
+
33
+ public Object jsonColumn(Value value) {
34
+ throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
35
+ }
36
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericArray;
5
+ import org.apache.avro.generic.GenericData;
6
+ import org.msgpack.value.Value;
7
+
8
+ import java.util.List;
9
+
10
+ public class AvroArrayConverter extends AbstractAvroValueConverter {
11
+ private AbstractAvroValueConverter elementConverter;
12
+
13
+ public AvroArrayConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
14
+ super(schema);
15
+ this.elementConverter = elementConverter;
16
+ }
17
+
18
+ @Override
19
+ public GenericArray<Object> jsonColumn(Value value) {
20
+ if (!value.isArrayValue())
21
+ throw new RuntimeException("Support only array type json record");
22
+
23
+ List<Value> list = value.asArrayValue().list();
24
+
25
+ GenericArray<Object> array = new GenericData.Array<>(list.size(), avroSchema);
26
+ for (Value val : list) {
27
+ switch (val.getValueType()) {
28
+ case STRING:
29
+ array.add(elementConverter.stringColumn(val.asStringValue().toString()));
30
+ break;
31
+ case INTEGER:
32
+ array.add(elementConverter.longColumn(val.asIntegerValue().toLong()));
33
+ break;
34
+ case FLOAT:
35
+ array.add(elementConverter.doubleColumn(val.asFloatValue().toDouble()));
36
+ break;
37
+ case BOOLEAN:
38
+ array.add(elementConverter.booleanColumn(val.asBooleanValue().getBoolean()));
39
+ break;
40
+ case ARRAY:
41
+ array.add(elementConverter.jsonColumn(val.asArrayValue()));
42
+ break;
43
+ case MAP:
44
+ array.add(elementConverter.jsonColumn(val.asMapValue()));
45
+ break;
46
+ default:
47
+ throw new RuntimeException("Irregular Messagepack type");
48
+ }
49
+ }
50
+ return array;
51
+ }
52
+ }
@@ -0,0 +1,14 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroBooleanConverter extends AbstractAvroValueConverter {
6
+ public AvroBooleanConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Object booleanColumn(boolean value) {
12
+ throw new RuntimeException("Unsupported typecasting");
13
+ }
14
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroDoubleConverter extends AbstractAvroValueConverter {
6
+ public AvroDoubleConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Double longColumn(long value) {
12
+ return Long.valueOf(value).doubleValue();
13
+ }
14
+
15
+ @Override
16
+ public Double doubleColumn(double value) {
17
+ return value;
18
+ }
19
+
20
+ @Override
21
+ public Double stringColumn(String value) {
22
+ return Double.parseDouble(value);
23
+ }
24
+
25
+ @Override
26
+ public Double timestampColumn(String value) {
27
+ return Double.parseDouble(value);
28
+ }
29
+ }
@@ -0,0 +1,25 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericEnumSymbol;
6
+
7
+ import java.util.List;
8
+
9
+ public class AvroEnumConverter extends AbstractAvroValueConverter {
10
+ private List<String> enumSymbols;
11
+
12
+ AvroEnumConverter(Schema schema, List<String> enumSymbols) {
13
+ super(schema);
14
+ this.enumSymbols = enumSymbols;
15
+ }
16
+
17
+ @Override
18
+ public GenericEnumSymbol stringColumn(String value) {
19
+ if (enumSymbols.contains(value)) {
20
+ return new GenericData.EnumSymbol(avroSchema, value);
21
+ } else {
22
+ throw new RuntimeException(String.format("%s is not in %s", value, enumSymbols.toString()));
23
+ }
24
+ }
25
+ }
@@ -0,0 +1,16 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericFixed;
6
+
7
+ public class AvroFixedConverter extends AbstractAvroValueConverter {
8
+ AvroFixedConverter(Schema schema) {
9
+ super(schema);
10
+ }
11
+
12
+ @Override
13
+ public GenericFixed stringColumn(String value) {
14
+ return new GenericData.Fixed(avroSchema, value.getBytes());
15
+ }
16
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroFloatConverter extends AbstractAvroValueConverter {
6
+ public AvroFloatConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Float longColumn(long value) {
12
+ return Long.valueOf(value).floatValue();
13
+ }
14
+
15
+ @Override
16
+ public Float doubleColumn(double value) {
17
+ return Double.valueOf(value).floatValue();
18
+ }
19
+
20
+ @Override
21
+ public Float stringColumn(String value) {
22
+ return Float.parseFloat(value);
23
+ }
24
+
25
+ @Override
26
+ public Float timestampColumn(String value) {
27
+ return Float.parseFloat(value);
28
+ }
29
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroIntConverter extends AbstractAvroValueConverter {
6
+ public AvroIntConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Integer longColumn(long value) {
12
+ return Long.valueOf(value).intValue();
13
+ }
14
+
15
+ @Override
16
+ public Integer doubleColumn(double value) {
17
+ return Double.valueOf(value).intValue();
18
+ }
19
+
20
+ @Override
21
+ public Integer stringColumn(String value) {
22
+ return Integer.parseInt(value);
23
+ }
24
+
25
+ @Override
26
+ public Integer timestampColumn(String value) {
27
+ return Integer.parseInt(value);
28
+ }
29
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+
5
+ public class AvroLongConverter extends AbstractAvroValueConverter {
6
+ public AvroLongConverter(Schema schema) {
7
+ super(schema);
8
+ }
9
+
10
+ @Override
11
+ public Long longColumn(long value) {
12
+ return value;
13
+ }
14
+
15
+ @Override
16
+ public Long doubleColumn(double value) {
17
+ return Math.round(value);
18
+ }
19
+
20
+ @Override
21
+ public Long stringColumn(String value) {
22
+ return Long.parseLong(value);
23
+ }
24
+
25
+ @Override
26
+ public Long timestampColumn(String value) {
27
+ return Long.parseLong(value);
28
+ }
29
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import avro.shaded.com.google.common.collect.ImmutableMap;
4
+ import org.apache.avro.Schema;
5
+ import org.msgpack.value.Value;
6
+
7
+ import java.util.Map;
8
+
9
+ public class AvroMapConverter extends AbstractAvroValueConverter {
10
+ private AbstractAvroValueConverter elementConverter;
11
+
12
+ public AvroMapConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
13
+ super(schema);
14
+ this.elementConverter = elementConverter;
15
+ }
16
+
17
+ @Override
18
+ public Map<String, Object> jsonColumn(Value value) {
19
+ if (!value.isMapValue())
20
+ throw new RuntimeException("Support only map type json record");
21
+
22
+ Map<Value, Value> map = value.asMapValue().map();
23
+
24
+ ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
25
+
26
+ for (Map.Entry<Value, Value> entry : map.entrySet()) {
27
+ switch (entry.getValue().getValueType()) {
28
+ case STRING:
29
+ builder.put(entry.getKey().toString(), elementConverter.stringColumn(entry.getValue().asStringValue().toString()));
30
+ break;
31
+ case INTEGER:
32
+ builder.put(entry.getKey().toString(), elementConverter.longColumn(entry.getValue().asIntegerValue().toLong()));
33
+ break;
34
+ case FLOAT:
35
+ builder.put(entry.getKey().toString(), elementConverter.doubleColumn(entry.getValue().asFloatValue().toDouble()));
36
+ break;
37
+ case BOOLEAN:
38
+ builder.put(entry.getKey().toString(), elementConverter.booleanColumn(entry.getValue().asBooleanValue().getBoolean()));
39
+ break;
40
+ case ARRAY:
41
+ builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asArrayValue()));
42
+ break;
43
+ case MAP:
44
+ builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asMapValue()));
45
+ break;
46
+ default:
47
+ throw new RuntimeException("Irregular Messagepack type");
48
+ }
49
+ }
50
+ return builder.build();
51
+ }
52
+ }
@@ -0,0 +1,40 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
5
+
6
+ public class AvroNullConverter extends AbstractAvroValueConverter {
7
+ public AvroNullConverter(Schema schema) {
8
+ super(schema);
9
+ }
10
+
11
+ @Override
12
+ public Object booleanColumn(boolean value) {
13
+ return null;
14
+ }
15
+
16
+ @Override
17
+ public Object longColumn(long value) {
18
+ return null;
19
+ }
20
+
21
+ @Override
22
+ public Object doubleColumn(double value) {
23
+ return null;
24
+ }
25
+
26
+ @Override
27
+ public Object stringColumn(String value) {
28
+ return null;
29
+ }
30
+
31
+ @Override
32
+ public Object timestampColumn(String value) {
33
+ return null;
34
+ }
35
+
36
+ @Override
37
+ public Object jsonColumn(Value value) {
38
+ return null;
39
+ }
40
+ }
@@ -0,0 +1,59 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.apache.avro.generic.GenericData;
5
+ import org.apache.avro.generic.GenericRecord;
6
+ import org.msgpack.value.Value;
7
+ import org.msgpack.value.ValueFactory;
8
+
9
+ import java.util.Map;
10
+
11
+ public class AvroRecordConverter extends AbstractAvroValueConverter {
12
+ private Map<String, AbstractAvroValueConverter> converterTable;
13
+
14
+ public AvroRecordConverter(Schema schema, Map<String, AbstractAvroValueConverter> converterTable) {
15
+ super(schema);
16
+ this.converterTable = converterTable;
17
+ }
18
+
19
+ @Override
20
+ public GenericRecord jsonColumn(Value value) {
21
+ if (!value.isMapValue())
22
+ throw new RuntimeException("Support only map type json record");
23
+
24
+ Map<Value, Value> map = value.asMapValue().map();
25
+
26
+ GenericRecord record = new GenericData.Record(avroSchema);
27
+ for (Map.Entry<String, AbstractAvroValueConverter> entry : converterTable.entrySet()) {
28
+ Value key = ValueFactory.newString(entry.getKey());
29
+ if (!map.containsKey(key)) {
30
+ record.put(entry.getKey(), null);
31
+ } else {
32
+ Value child = map.get(ValueFactory.newString(entry.getKey()));
33
+ switch (child.getValueType()) {
34
+ case STRING:
35
+ record.put(entry.getKey(), entry.getValue().stringColumn(child.asStringValue().toString()));
36
+ break;
37
+ case INTEGER:
38
+ record.put(entry.getKey(), entry.getValue().longColumn(child.asIntegerValue().toLong()));
39
+ break;
40
+ case FLOAT:
41
+ record.put(entry.getKey(), entry.getValue().doubleColumn(child.asFloatValue().toDouble()));
42
+ break;
43
+ case BOOLEAN:
44
+ record.put(entry.getKey(), entry.getValue().booleanColumn(child.asBooleanValue().getBoolean()));
45
+ break;
46
+ case ARRAY:
47
+ record.put(entry.getKey(), entry.getValue().jsonColumn(child.asArrayValue()));
48
+ break;
49
+ case MAP:
50
+ record.put(entry.getKey(), entry.getValue().jsonColumn(child.asMapValue()));
51
+ break;
52
+ default:
53
+ throw new RuntimeException("Irregular Messagepack type");
54
+ }
55
+ }
56
+ }
57
+ return record;
58
+ }
59
+ }
@@ -0,0 +1,41 @@
1
+ package org.embulk.formatter.avro.converter;
2
+
3
+ import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
5
+
6
+ public class AvroStringConverter extends AbstractAvroValueConverter {
7
+ public AvroStringConverter(Schema schema) {
8
+ super(schema);
9
+ }
10
+
11
+ @Override
12
+ public String booleanColumn(boolean value) {
13
+ return String.valueOf(value);
14
+ }
15
+
16
+ @Override
17
+ public String longColumn(long value) {
18
+ return String.valueOf(value);
19
+ }
20
+
21
+ @Override
22
+ public String doubleColumn(double value) {
23
+ return String.valueOf(value);
24
+ }
25
+
26
+ @Override
27
+ public String stringColumn(String value) {
28
+ return value;
29
+ }
30
+
31
+ @Override
32
+ public String timestampColumn(String value) {
33
+ return value;
34
+ }
35
+
36
+
37
+ @Override
38
+ public String jsonColumn(Value value) {
39
+ return value.toJson();
40
+ }
41
+ }