embulk-formatter-avro 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/LICENSE.txt +21 -0
- data/README.md +51 -0
- data/build.gradle +97 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +34 -0
- data/example/sample_01.csv +7 -0
- data/example/schema.avsc +33 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/formatter/avro.rb +3 -0
- data/src/main/java/org/embulk/formatter/avro/AvroFormatterColumnVisitor.java +97 -0
- data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +183 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AbstractAvroValueConverter.java +36 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroArrayConverter.java +52 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +14 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroDoubleConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroEnumConverter.java +25 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroFixedConverter.java +16 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroFloatConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroIntConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroLongConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroMapConverter.java +52 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroNullConverter.java +40 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroRecordConverter.java +59 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java +41 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java +52 -0
- data/src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java +5 -0
- metadata +111 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
5
|
+
|
6
|
+
abstract public class AbstractAvroValueConverter {
|
7
|
+
protected Schema avroSchema;
|
8
|
+
|
9
|
+
public AbstractAvroValueConverter(Schema schema) {
|
10
|
+
this.avroSchema = schema;
|
11
|
+
}
|
12
|
+
|
13
|
+
public Object booleanColumn(boolean value) {
|
14
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
15
|
+
}
|
16
|
+
|
17
|
+
public Object longColumn(long value) {
|
18
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
19
|
+
}
|
20
|
+
|
21
|
+
public Object doubleColumn(double value) {
|
22
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
23
|
+
}
|
24
|
+
|
25
|
+
public Object stringColumn(String value) {
|
26
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
27
|
+
}
|
28
|
+
|
29
|
+
public Object timestampColumn(String value) {
|
30
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
31
|
+
}
|
32
|
+
|
33
|
+
public Object jsonColumn(Value value) {
|
34
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
35
|
+
}
|
36
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericArray;
|
5
|
+
import org.apache.avro.generic.GenericData;
|
6
|
+
import org.msgpack.value.Value;
|
7
|
+
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class AvroArrayConverter extends AbstractAvroValueConverter {
|
11
|
+
private AbstractAvroValueConverter elementConverter;
|
12
|
+
|
13
|
+
public AvroArrayConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
|
14
|
+
super(schema);
|
15
|
+
this.elementConverter = elementConverter;
|
16
|
+
}
|
17
|
+
|
18
|
+
@Override
|
19
|
+
public GenericArray<Object> jsonColumn(Value value) {
|
20
|
+
if (!value.isArrayValue())
|
21
|
+
throw new RuntimeException("Support only array type json record");
|
22
|
+
|
23
|
+
List<Value> list = value.asArrayValue().list();
|
24
|
+
|
25
|
+
GenericArray<Object> array = new GenericData.Array<>(list.size(), avroSchema);
|
26
|
+
for (Value val : list) {
|
27
|
+
switch (val.getValueType()) {
|
28
|
+
case STRING:
|
29
|
+
array.add(elementConverter.stringColumn(val.asStringValue().toString()));
|
30
|
+
break;
|
31
|
+
case INTEGER:
|
32
|
+
array.add(elementConverter.longColumn(val.asIntegerValue().toLong()));
|
33
|
+
break;
|
34
|
+
case FLOAT:
|
35
|
+
array.add(elementConverter.doubleColumn(val.asFloatValue().toDouble()));
|
36
|
+
break;
|
37
|
+
case BOOLEAN:
|
38
|
+
array.add(elementConverter.booleanColumn(val.asBooleanValue().getBoolean()));
|
39
|
+
break;
|
40
|
+
case ARRAY:
|
41
|
+
array.add(elementConverter.jsonColumn(val.asArrayValue()));
|
42
|
+
break;
|
43
|
+
case MAP:
|
44
|
+
array.add(elementConverter.jsonColumn(val.asMapValue()));
|
45
|
+
break;
|
46
|
+
default:
|
47
|
+
throw new RuntimeException("Irregular Messagepack type");
|
48
|
+
}
|
49
|
+
}
|
50
|
+
return array;
|
51
|
+
}
|
52
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroBooleanConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroBooleanConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Object booleanColumn(boolean value) {
|
12
|
+
throw new RuntimeException("Unsupported typecasting");
|
13
|
+
}
|
14
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroDoubleConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroDoubleConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Double longColumn(long value) {
|
12
|
+
return Long.valueOf(value).doubleValue();
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Double doubleColumn(double value) {
|
17
|
+
return value;
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Double stringColumn(String value) {
|
22
|
+
return Double.parseDouble(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Double timestampColumn(String value) {
|
27
|
+
return Double.parseDouble(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericData;
|
5
|
+
import org.apache.avro.generic.GenericEnumSymbol;
|
6
|
+
|
7
|
+
import java.util.List;
|
8
|
+
|
9
|
+
public class AvroEnumConverter extends AbstractAvroValueConverter {
|
10
|
+
private List<String> enumSymbols;
|
11
|
+
|
12
|
+
AvroEnumConverter(Schema schema, List<String> enumSymbols) {
|
13
|
+
super(schema);
|
14
|
+
this.enumSymbols = enumSymbols;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public GenericEnumSymbol stringColumn(String value) {
|
19
|
+
if (enumSymbols.contains(value)) {
|
20
|
+
return new GenericData.EnumSymbol(avroSchema, value);
|
21
|
+
} else {
|
22
|
+
throw new RuntimeException(String.format("%s is not in %s", value, enumSymbols.toString()));
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericData;
|
5
|
+
import org.apache.avro.generic.GenericFixed;
|
6
|
+
|
7
|
+
public class AvroFixedConverter extends AbstractAvroValueConverter {
|
8
|
+
AvroFixedConverter(Schema schema) {
|
9
|
+
super(schema);
|
10
|
+
}
|
11
|
+
|
12
|
+
@Override
|
13
|
+
public GenericFixed stringColumn(String value) {
|
14
|
+
return new GenericData.Fixed(avroSchema, value.getBytes());
|
15
|
+
}
|
16
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroFloatConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroFloatConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Float longColumn(long value) {
|
12
|
+
return Long.valueOf(value).floatValue();
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Float doubleColumn(double value) {
|
17
|
+
return Double.valueOf(value).floatValue();
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Float stringColumn(String value) {
|
22
|
+
return Float.parseFloat(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Float timestampColumn(String value) {
|
27
|
+
return Float.parseFloat(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroIntConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroIntConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Integer longColumn(long value) {
|
12
|
+
return Long.valueOf(value).intValue();
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Integer doubleColumn(double value) {
|
17
|
+
return Double.valueOf(value).intValue();
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Integer stringColumn(String value) {
|
22
|
+
return Integer.parseInt(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Integer timestampColumn(String value) {
|
27
|
+
return Integer.parseInt(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroLongConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroLongConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Long longColumn(long value) {
|
12
|
+
return value;
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Long doubleColumn(double value) {
|
17
|
+
return Math.round(value);
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Long stringColumn(String value) {
|
22
|
+
return Long.parseLong(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Long timestampColumn(String value) {
|
27
|
+
return Long.parseLong(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import avro.shaded.com.google.common.collect.ImmutableMap;
|
4
|
+
import org.apache.avro.Schema;
|
5
|
+
import org.msgpack.value.Value;
|
6
|
+
|
7
|
+
import java.util.Map;
|
8
|
+
|
9
|
+
public class AvroMapConverter extends AbstractAvroValueConverter {
|
10
|
+
private AbstractAvroValueConverter elementConverter;
|
11
|
+
|
12
|
+
public AvroMapConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
|
13
|
+
super(schema);
|
14
|
+
this.elementConverter = elementConverter;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public Map<String, Object> jsonColumn(Value value) {
|
19
|
+
if (!value.isMapValue())
|
20
|
+
throw new RuntimeException("Support only map type json record");
|
21
|
+
|
22
|
+
Map<Value, Value> map = value.asMapValue().map();
|
23
|
+
|
24
|
+
ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
|
25
|
+
|
26
|
+
for (Map.Entry<Value, Value> entry : map.entrySet()) {
|
27
|
+
switch (entry.getValue().getValueType()) {
|
28
|
+
case STRING:
|
29
|
+
builder.put(entry.getKey().toString(), elementConverter.stringColumn(entry.getValue().asStringValue().toString()));
|
30
|
+
break;
|
31
|
+
case INTEGER:
|
32
|
+
builder.put(entry.getKey().toString(), elementConverter.longColumn(entry.getValue().asIntegerValue().toLong()));
|
33
|
+
break;
|
34
|
+
case FLOAT:
|
35
|
+
builder.put(entry.getKey().toString(), elementConverter.doubleColumn(entry.getValue().asFloatValue().toDouble()));
|
36
|
+
break;
|
37
|
+
case BOOLEAN:
|
38
|
+
builder.put(entry.getKey().toString(), elementConverter.booleanColumn(entry.getValue().asBooleanValue().getBoolean()));
|
39
|
+
break;
|
40
|
+
case ARRAY:
|
41
|
+
builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asArrayValue()));
|
42
|
+
break;
|
43
|
+
case MAP:
|
44
|
+
builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asMapValue()));
|
45
|
+
break;
|
46
|
+
default:
|
47
|
+
throw new RuntimeException("Irregular Messagepack type");
|
48
|
+
}
|
49
|
+
}
|
50
|
+
return builder.build();
|
51
|
+
}
|
52
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
5
|
+
|
6
|
+
public class AvroNullConverter extends AbstractAvroValueConverter {
|
7
|
+
public AvroNullConverter(Schema schema) {
|
8
|
+
super(schema);
|
9
|
+
}
|
10
|
+
|
11
|
+
@Override
|
12
|
+
public Object booleanColumn(boolean value) {
|
13
|
+
return null;
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public Object longColumn(long value) {
|
18
|
+
return null;
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public Object doubleColumn(double value) {
|
23
|
+
return null;
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public Object stringColumn(String value) {
|
28
|
+
return null;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public Object timestampColumn(String value) {
|
33
|
+
return null;
|
34
|
+
}
|
35
|
+
|
36
|
+
@Override
|
37
|
+
public Object jsonColumn(Value value) {
|
38
|
+
return null;
|
39
|
+
}
|
40
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericData;
|
5
|
+
import org.apache.avro.generic.GenericRecord;
|
6
|
+
import org.msgpack.value.Value;
|
7
|
+
import org.msgpack.value.ValueFactory;
|
8
|
+
|
9
|
+
import java.util.Map;
|
10
|
+
|
11
|
+
public class AvroRecordConverter extends AbstractAvroValueConverter {
|
12
|
+
private Map<String, AbstractAvroValueConverter> converterTable;
|
13
|
+
|
14
|
+
public AvroRecordConverter(Schema schema, Map<String, AbstractAvroValueConverter> converterTable) {
|
15
|
+
super(schema);
|
16
|
+
this.converterTable = converterTable;
|
17
|
+
}
|
18
|
+
|
19
|
+
@Override
|
20
|
+
public GenericRecord jsonColumn(Value value) {
|
21
|
+
if (!value.isMapValue())
|
22
|
+
throw new RuntimeException("Support only map type json record");
|
23
|
+
|
24
|
+
Map<Value, Value> map = value.asMapValue().map();
|
25
|
+
|
26
|
+
GenericRecord record = new GenericData.Record(avroSchema);
|
27
|
+
for (Map.Entry<String, AbstractAvroValueConverter> entry : converterTable.entrySet()) {
|
28
|
+
Value key = ValueFactory.newString(entry.getKey());
|
29
|
+
if (!map.containsKey(key)) {
|
30
|
+
record.put(entry.getKey(), null);
|
31
|
+
} else {
|
32
|
+
Value child = map.get(ValueFactory.newString(entry.getKey()));
|
33
|
+
switch (child.getValueType()) {
|
34
|
+
case STRING:
|
35
|
+
record.put(entry.getKey(), entry.getValue().stringColumn(child.asStringValue().toString()));
|
36
|
+
break;
|
37
|
+
case INTEGER:
|
38
|
+
record.put(entry.getKey(), entry.getValue().longColumn(child.asIntegerValue().toLong()));
|
39
|
+
break;
|
40
|
+
case FLOAT:
|
41
|
+
record.put(entry.getKey(), entry.getValue().doubleColumn(child.asFloatValue().toDouble()));
|
42
|
+
break;
|
43
|
+
case BOOLEAN:
|
44
|
+
record.put(entry.getKey(), entry.getValue().booleanColumn(child.asBooleanValue().getBoolean()));
|
45
|
+
break;
|
46
|
+
case ARRAY:
|
47
|
+
record.put(entry.getKey(), entry.getValue().jsonColumn(child.asArrayValue()));
|
48
|
+
break;
|
49
|
+
case MAP:
|
50
|
+
record.put(entry.getKey(), entry.getValue().jsonColumn(child.asMapValue()));
|
51
|
+
break;
|
52
|
+
default:
|
53
|
+
throw new RuntimeException("Irregular Messagepack type");
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
return record;
|
58
|
+
}
|
59
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
5
|
+
|
6
|
+
public class AvroStringConverter extends AbstractAvroValueConverter {
|
7
|
+
public AvroStringConverter(Schema schema) {
|
8
|
+
super(schema);
|
9
|
+
}
|
10
|
+
|
11
|
+
@Override
|
12
|
+
public String booleanColumn(boolean value) {
|
13
|
+
return String.valueOf(value);
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public String longColumn(long value) {
|
18
|
+
return String.valueOf(value);
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public String doubleColumn(double value) {
|
23
|
+
return String.valueOf(value);
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public String stringColumn(String value) {
|
28
|
+
return value;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public String timestampColumn(String value) {
|
33
|
+
return value;
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public String jsonColumn(Value value) {
|
39
|
+
return value.toJson();
|
40
|
+
}
|
41
|
+
}
|