embulk-formatter-avro 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/LICENSE.txt +21 -0
- data/README.md +51 -0
- data/build.gradle +97 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +34 -0
- data/example/sample_01.csv +7 -0
- data/example/schema.avsc +33 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/formatter/avro.rb +3 -0
- data/src/main/java/org/embulk/formatter/avro/AvroFormatterColumnVisitor.java +97 -0
- data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +183 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AbstractAvroValueConverter.java +36 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroArrayConverter.java +52 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +14 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroDoubleConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroEnumConverter.java +25 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroFixedConverter.java +16 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroFloatConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroIntConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroLongConverter.java +29 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroMapConverter.java +52 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroNullConverter.java +40 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroRecordConverter.java +59 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java +41 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java +52 -0
- data/src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java +5 -0
- metadata +111 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
5
|
+
|
6
|
+
abstract public class AbstractAvroValueConverter {
|
7
|
+
protected Schema avroSchema;
|
8
|
+
|
9
|
+
public AbstractAvroValueConverter(Schema schema) {
|
10
|
+
this.avroSchema = schema;
|
11
|
+
}
|
12
|
+
|
13
|
+
public Object booleanColumn(boolean value) {
|
14
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
15
|
+
}
|
16
|
+
|
17
|
+
public Object longColumn(long value) {
|
18
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
19
|
+
}
|
20
|
+
|
21
|
+
public Object doubleColumn(double value) {
|
22
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
23
|
+
}
|
24
|
+
|
25
|
+
public Object stringColumn(String value) {
|
26
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
27
|
+
}
|
28
|
+
|
29
|
+
public Object timestampColumn(String value) {
|
30
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
31
|
+
}
|
32
|
+
|
33
|
+
public Object jsonColumn(Value value) {
|
34
|
+
throw new RuntimeException(String.format("%s cannot convert to %s of %s field", value, avroSchema.getType(), avroSchema.getName()));
|
35
|
+
}
|
36
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericArray;
|
5
|
+
import org.apache.avro.generic.GenericData;
|
6
|
+
import org.msgpack.value.Value;
|
7
|
+
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class AvroArrayConverter extends AbstractAvroValueConverter {
|
11
|
+
private AbstractAvroValueConverter elementConverter;
|
12
|
+
|
13
|
+
public AvroArrayConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
|
14
|
+
super(schema);
|
15
|
+
this.elementConverter = elementConverter;
|
16
|
+
}
|
17
|
+
|
18
|
+
@Override
|
19
|
+
public GenericArray<Object> jsonColumn(Value value) {
|
20
|
+
if (!value.isArrayValue())
|
21
|
+
throw new RuntimeException("Support only array type json record");
|
22
|
+
|
23
|
+
List<Value> list = value.asArrayValue().list();
|
24
|
+
|
25
|
+
GenericArray<Object> array = new GenericData.Array<>(list.size(), avroSchema);
|
26
|
+
for (Value val : list) {
|
27
|
+
switch (val.getValueType()) {
|
28
|
+
case STRING:
|
29
|
+
array.add(elementConverter.stringColumn(val.asStringValue().toString()));
|
30
|
+
break;
|
31
|
+
case INTEGER:
|
32
|
+
array.add(elementConverter.longColumn(val.asIntegerValue().toLong()));
|
33
|
+
break;
|
34
|
+
case FLOAT:
|
35
|
+
array.add(elementConverter.doubleColumn(val.asFloatValue().toDouble()));
|
36
|
+
break;
|
37
|
+
case BOOLEAN:
|
38
|
+
array.add(elementConverter.booleanColumn(val.asBooleanValue().getBoolean()));
|
39
|
+
break;
|
40
|
+
case ARRAY:
|
41
|
+
array.add(elementConverter.jsonColumn(val.asArrayValue()));
|
42
|
+
break;
|
43
|
+
case MAP:
|
44
|
+
array.add(elementConverter.jsonColumn(val.asMapValue()));
|
45
|
+
break;
|
46
|
+
default:
|
47
|
+
throw new RuntimeException("Irregular Messagepack type");
|
48
|
+
}
|
49
|
+
}
|
50
|
+
return array;
|
51
|
+
}
|
52
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroBooleanConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroBooleanConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Object booleanColumn(boolean value) {
|
12
|
+
throw new RuntimeException("Unsupported typecasting");
|
13
|
+
}
|
14
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroDoubleConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroDoubleConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Double longColumn(long value) {
|
12
|
+
return Long.valueOf(value).doubleValue();
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Double doubleColumn(double value) {
|
17
|
+
return value;
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Double stringColumn(String value) {
|
22
|
+
return Double.parseDouble(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Double timestampColumn(String value) {
|
27
|
+
return Double.parseDouble(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericData;
|
5
|
+
import org.apache.avro.generic.GenericEnumSymbol;
|
6
|
+
|
7
|
+
import java.util.List;
|
8
|
+
|
9
|
+
public class AvroEnumConverter extends AbstractAvroValueConverter {
|
10
|
+
private List<String> enumSymbols;
|
11
|
+
|
12
|
+
AvroEnumConverter(Schema schema, List<String> enumSymbols) {
|
13
|
+
super(schema);
|
14
|
+
this.enumSymbols = enumSymbols;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public GenericEnumSymbol stringColumn(String value) {
|
19
|
+
if (enumSymbols.contains(value)) {
|
20
|
+
return new GenericData.EnumSymbol(avroSchema, value);
|
21
|
+
} else {
|
22
|
+
throw new RuntimeException(String.format("%s is not in %s", value, enumSymbols.toString()));
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericData;
|
5
|
+
import org.apache.avro.generic.GenericFixed;
|
6
|
+
|
7
|
+
public class AvroFixedConverter extends AbstractAvroValueConverter {
|
8
|
+
AvroFixedConverter(Schema schema) {
|
9
|
+
super(schema);
|
10
|
+
}
|
11
|
+
|
12
|
+
@Override
|
13
|
+
public GenericFixed stringColumn(String value) {
|
14
|
+
return new GenericData.Fixed(avroSchema, value.getBytes());
|
15
|
+
}
|
16
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroFloatConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroFloatConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Float longColumn(long value) {
|
12
|
+
return Long.valueOf(value).floatValue();
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Float doubleColumn(double value) {
|
17
|
+
return Double.valueOf(value).floatValue();
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Float stringColumn(String value) {
|
22
|
+
return Float.parseFloat(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Float timestampColumn(String value) {
|
27
|
+
return Float.parseFloat(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroIntConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroIntConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Integer longColumn(long value) {
|
12
|
+
return Long.valueOf(value).intValue();
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Integer doubleColumn(double value) {
|
17
|
+
return Double.valueOf(value).intValue();
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Integer stringColumn(String value) {
|
22
|
+
return Integer.parseInt(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Integer timestampColumn(String value) {
|
27
|
+
return Integer.parseInt(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
|
5
|
+
public class AvroLongConverter extends AbstractAvroValueConverter {
|
6
|
+
public AvroLongConverter(Schema schema) {
|
7
|
+
super(schema);
|
8
|
+
}
|
9
|
+
|
10
|
+
@Override
|
11
|
+
public Long longColumn(long value) {
|
12
|
+
return value;
|
13
|
+
}
|
14
|
+
|
15
|
+
@Override
|
16
|
+
public Long doubleColumn(double value) {
|
17
|
+
return Math.round(value);
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public Long stringColumn(String value) {
|
22
|
+
return Long.parseLong(value);
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
public Long timestampColumn(String value) {
|
27
|
+
return Long.parseLong(value);
|
28
|
+
}
|
29
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import avro.shaded.com.google.common.collect.ImmutableMap;
|
4
|
+
import org.apache.avro.Schema;
|
5
|
+
import org.msgpack.value.Value;
|
6
|
+
|
7
|
+
import java.util.Map;
|
8
|
+
|
9
|
+
public class AvroMapConverter extends AbstractAvroValueConverter {
|
10
|
+
private AbstractAvroValueConverter elementConverter;
|
11
|
+
|
12
|
+
public AvroMapConverter(Schema schema, AbstractAvroValueConverter elementConverter) {
|
13
|
+
super(schema);
|
14
|
+
this.elementConverter = elementConverter;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public Map<String, Object> jsonColumn(Value value) {
|
19
|
+
if (!value.isMapValue())
|
20
|
+
throw new RuntimeException("Support only map type json record");
|
21
|
+
|
22
|
+
Map<Value, Value> map = value.asMapValue().map();
|
23
|
+
|
24
|
+
ImmutableMap.Builder<String, Object> builder = ImmutableMap.builder();
|
25
|
+
|
26
|
+
for (Map.Entry<Value, Value> entry : map.entrySet()) {
|
27
|
+
switch (entry.getValue().getValueType()) {
|
28
|
+
case STRING:
|
29
|
+
builder.put(entry.getKey().toString(), elementConverter.stringColumn(entry.getValue().asStringValue().toString()));
|
30
|
+
break;
|
31
|
+
case INTEGER:
|
32
|
+
builder.put(entry.getKey().toString(), elementConverter.longColumn(entry.getValue().asIntegerValue().toLong()));
|
33
|
+
break;
|
34
|
+
case FLOAT:
|
35
|
+
builder.put(entry.getKey().toString(), elementConverter.doubleColumn(entry.getValue().asFloatValue().toDouble()));
|
36
|
+
break;
|
37
|
+
case BOOLEAN:
|
38
|
+
builder.put(entry.getKey().toString(), elementConverter.booleanColumn(entry.getValue().asBooleanValue().getBoolean()));
|
39
|
+
break;
|
40
|
+
case ARRAY:
|
41
|
+
builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asArrayValue()));
|
42
|
+
break;
|
43
|
+
case MAP:
|
44
|
+
builder.put(entry.getKey().toString(), elementConverter.jsonColumn(entry.getValue().asMapValue()));
|
45
|
+
break;
|
46
|
+
default:
|
47
|
+
throw new RuntimeException("Irregular Messagepack type");
|
48
|
+
}
|
49
|
+
}
|
50
|
+
return builder.build();
|
51
|
+
}
|
52
|
+
}
|
@@ -0,0 +1,40 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
5
|
+
|
6
|
+
public class AvroNullConverter extends AbstractAvroValueConverter {
|
7
|
+
public AvroNullConverter(Schema schema) {
|
8
|
+
super(schema);
|
9
|
+
}
|
10
|
+
|
11
|
+
@Override
|
12
|
+
public Object booleanColumn(boolean value) {
|
13
|
+
return null;
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public Object longColumn(long value) {
|
18
|
+
return null;
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public Object doubleColumn(double value) {
|
23
|
+
return null;
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public Object stringColumn(String value) {
|
28
|
+
return null;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public Object timestampColumn(String value) {
|
33
|
+
return null;
|
34
|
+
}
|
35
|
+
|
36
|
+
@Override
|
37
|
+
public Object jsonColumn(Value value) {
|
38
|
+
return null;
|
39
|
+
}
|
40
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.apache.avro.generic.GenericData;
|
5
|
+
import org.apache.avro.generic.GenericRecord;
|
6
|
+
import org.msgpack.value.Value;
|
7
|
+
import org.msgpack.value.ValueFactory;
|
8
|
+
|
9
|
+
import java.util.Map;
|
10
|
+
|
11
|
+
public class AvroRecordConverter extends AbstractAvroValueConverter {
|
12
|
+
private Map<String, AbstractAvroValueConverter> converterTable;
|
13
|
+
|
14
|
+
public AvroRecordConverter(Schema schema, Map<String, AbstractAvroValueConverter> converterTable) {
|
15
|
+
super(schema);
|
16
|
+
this.converterTable = converterTable;
|
17
|
+
}
|
18
|
+
|
19
|
+
@Override
|
20
|
+
public GenericRecord jsonColumn(Value value) {
|
21
|
+
if (!value.isMapValue())
|
22
|
+
throw new RuntimeException("Support only map type json record");
|
23
|
+
|
24
|
+
Map<Value, Value> map = value.asMapValue().map();
|
25
|
+
|
26
|
+
GenericRecord record = new GenericData.Record(avroSchema);
|
27
|
+
for (Map.Entry<String, AbstractAvroValueConverter> entry : converterTable.entrySet()) {
|
28
|
+
Value key = ValueFactory.newString(entry.getKey());
|
29
|
+
if (!map.containsKey(key)) {
|
30
|
+
record.put(entry.getKey(), null);
|
31
|
+
} else {
|
32
|
+
Value child = map.get(ValueFactory.newString(entry.getKey()));
|
33
|
+
switch (child.getValueType()) {
|
34
|
+
case STRING:
|
35
|
+
record.put(entry.getKey(), entry.getValue().stringColumn(child.asStringValue().toString()));
|
36
|
+
break;
|
37
|
+
case INTEGER:
|
38
|
+
record.put(entry.getKey(), entry.getValue().longColumn(child.asIntegerValue().toLong()));
|
39
|
+
break;
|
40
|
+
case FLOAT:
|
41
|
+
record.put(entry.getKey(), entry.getValue().doubleColumn(child.asFloatValue().toDouble()));
|
42
|
+
break;
|
43
|
+
case BOOLEAN:
|
44
|
+
record.put(entry.getKey(), entry.getValue().booleanColumn(child.asBooleanValue().getBoolean()));
|
45
|
+
break;
|
46
|
+
case ARRAY:
|
47
|
+
record.put(entry.getKey(), entry.getValue().jsonColumn(child.asArrayValue()));
|
48
|
+
break;
|
49
|
+
case MAP:
|
50
|
+
record.put(entry.getKey(), entry.getValue().jsonColumn(child.asMapValue()));
|
51
|
+
break;
|
52
|
+
default:
|
53
|
+
throw new RuntimeException("Irregular Messagepack type");
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
return record;
|
58
|
+
}
|
59
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package org.embulk.formatter.avro.converter;
|
2
|
+
|
3
|
+
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
5
|
+
|
6
|
+
public class AvroStringConverter extends AbstractAvroValueConverter {
|
7
|
+
public AvroStringConverter(Schema schema) {
|
8
|
+
super(schema);
|
9
|
+
}
|
10
|
+
|
11
|
+
@Override
|
12
|
+
public String booleanColumn(boolean value) {
|
13
|
+
return String.valueOf(value);
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public String longColumn(long value) {
|
18
|
+
return String.valueOf(value);
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public String doubleColumn(double value) {
|
23
|
+
return String.valueOf(value);
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public String stringColumn(String value) {
|
28
|
+
return value;
|
29
|
+
}
|
30
|
+
|
31
|
+
@Override
|
32
|
+
public String timestampColumn(String value) {
|
33
|
+
return value;
|
34
|
+
}
|
35
|
+
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public String jsonColumn(Value value) {
|
39
|
+
return value.toJson();
|
40
|
+
}
|
41
|
+
}
|