embulk-formatter-avro 0.1.1 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/build.gradle +1 -1
- data/example/config.yml +1 -0
- data/example/sample_01.csv +7 -7
- data/example/schema.avsc +2 -1
- data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +66 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +18 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 297627941b7e4e884febe21ab19701ee8d136d95
|
4
|
+
data.tar.gz: 9d6745e4e739064db4fe91e78c0cc39fe3948488
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd3fa9f5211bb629007fe942a68bfa99a66c31167864ef241258611c125bd77d232bbdcd97960369d3e04b56467c9dc9e11bf8cb8f955877fd9ea244f5242ffb
|
7
|
+
data.tar.gz: c8884bc84f00bf6720728db03033370ba5285d64431a12424f034a104488eb78aa4f9463e6c8e762d698d04992de40e13a1585c0da01479b10a6e3b4c85df1c2
|
data/README.md
CHANGED
@@ -28,6 +28,8 @@ See. [AvroValueConverters](https://github.com/joker1007/embulk-formatter-avro/tr
|
|
28
28
|
## Configuration
|
29
29
|
|
30
30
|
- **avsc**: avro schema (avsc) filepath (string, required)
|
31
|
+
- **codec**: avro codec type (enum: `deflate`, `bzip2`, `xz`, `snappy`, optional)
|
32
|
+
- **compression\_level**: avro codec compression level (integer, optional, for only `deflate` and `xz` codec)
|
31
33
|
- **skip\_error\_record**: If you want to skip error record, set true (boolean, default: `false`)
|
32
34
|
|
33
35
|
## Example
|
data/build.gradle
CHANGED
data/example/config.yml
CHANGED
data/example/sample_01.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
id,account,time,purchase,comment,data,data2,type,md5
|
2
|
-
1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9
|
3
|
-
2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL
|
4
|
-
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll
|
5
|
-
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29
|
6
|
-
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL
|
7
|
-
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL
|
1
|
+
id,account,time,purchase,comment,data,data2,type,md5,flag
|
2
|
+
1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9,true
|
3
|
+
2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL,true
|
4
|
+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll,true
|
5
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29,true
|
6
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL,false
|
7
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL,false
|
data/example/schema.avsc
CHANGED
@@ -28,6 +28,7 @@
|
|
28
28
|
}
|
29
29
|
}},
|
30
30
|
{"name": "type", "type": {"type": "enum", "name": "type_enum", "symbols": ["A", "B", "C"]}},
|
31
|
-
{"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]}
|
31
|
+
{"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]},
|
32
|
+
{"name": "flag", "type": "boolean"}
|
32
33
|
]
|
33
34
|
}
|
@@ -1,5 +1,9 @@
|
|
1
1
|
package org.embulk.formatter.avro;
|
2
2
|
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
5
|
+
import com.google.common.base.Optional;
|
6
|
+
import org.apache.avro.file.CodecFactory;
|
3
7
|
import org.apache.avro.file.DataFileWriter;
|
4
8
|
import org.apache.avro.generic.GenericData;
|
5
9
|
import org.apache.avro.generic.GenericDatumWriter;
|
@@ -30,6 +34,7 @@ import org.slf4j.Logger;
|
|
30
34
|
|
31
35
|
import java.io.File;
|
32
36
|
import java.io.IOException;
|
37
|
+
import java.util.Locale;
|
33
38
|
import java.util.Map;
|
34
39
|
|
35
40
|
public class AvroFormatterPlugin
|
@@ -41,6 +46,14 @@ public class AvroFormatterPlugin
|
|
41
46
|
@Config("avsc")
|
42
47
|
LocalFile getAvsc();
|
43
48
|
|
49
|
+
@Config("codec")
|
50
|
+
@ConfigDefault("null")
|
51
|
+
Optional<Codec> getCodec();
|
52
|
+
|
53
|
+
@Config("compression_level")
|
54
|
+
@ConfigDefault("null")
|
55
|
+
Optional<Integer> getCompressionLevel();
|
56
|
+
|
44
57
|
@Config("column_options")
|
45
58
|
@ConfigDefault("{}")
|
46
59
|
Map<String, TimestampFormatter.TimestampColumnOption> getColumnOptions();
|
@@ -95,6 +108,7 @@ public class AvroFormatterPlugin
|
|
95
108
|
avroSchema = new org.apache.avro.Schema.Parser().parse(avsc);
|
96
109
|
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
|
97
110
|
writer = new DataFileWriter<>(datumWriter);
|
111
|
+
writer.setCodec(task.getCodec().or(Codec.NULL).getCodecFactory(task.getCompressionLevel()));
|
98
112
|
stream.nextFile();
|
99
113
|
writer.create(avroSchema, stream);
|
100
114
|
} catch (IOException e) {
|
@@ -166,4 +180,56 @@ public class AvroFormatterPlugin
|
|
166
180
|
}
|
167
181
|
};
|
168
182
|
}
|
183
|
+
|
184
|
+
public enum Codec {
|
185
|
+
NULL {
|
186
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
187
|
+
return CodecFactory.nullCodec();
|
188
|
+
}
|
189
|
+
},
|
190
|
+
DEFLATE {
|
191
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
192
|
+
return CodecFactory.deflateCodec(compressionLevel.or(CodecFactory.DEFAULT_DEFLATE_LEVEL));
|
193
|
+
}
|
194
|
+
},
|
195
|
+
XZ {
|
196
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
197
|
+
return CodecFactory.xzCodec(compressionLevel.or(CodecFactory.DEFAULT_XZ_LEVEL));
|
198
|
+
}
|
199
|
+
},
|
200
|
+
SNAPPY {
|
201
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
202
|
+
return CodecFactory.snappyCodec();
|
203
|
+
}
|
204
|
+
},
|
205
|
+
BZIP2 {
|
206
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
207
|
+
return CodecFactory.bzip2Codec();
|
208
|
+
}
|
209
|
+
};
|
210
|
+
|
211
|
+
@JsonValue
|
212
|
+
@Override
|
213
|
+
public String toString() {
|
214
|
+
return name().toLowerCase(Locale.ENGLISH);
|
215
|
+
}
|
216
|
+
|
217
|
+
abstract public CodecFactory getCodecFactory(Optional<Integer> compressionLevel);
|
218
|
+
|
219
|
+
@JsonCreator
|
220
|
+
public static Codec fromString(String name) {
|
221
|
+
switch (name) {
|
222
|
+
case "deflate":
|
223
|
+
return DEFLATE;
|
224
|
+
case "xz":
|
225
|
+
return XZ;
|
226
|
+
case "snappy":
|
227
|
+
return SNAPPY;
|
228
|
+
case "bzip2":
|
229
|
+
return BZIP2;
|
230
|
+
default:
|
231
|
+
throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are single_column, multi_column", name));
|
232
|
+
}
|
233
|
+
}
|
234
|
+
}
|
169
235
|
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.formatter.avro.converter;
|
2
2
|
|
3
3
|
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
4
5
|
|
5
6
|
public class AvroBooleanConverter extends AbstractAvroValueConverter {
|
6
7
|
public AvroBooleanConverter(Schema schema) {
|
@@ -8,7 +9,22 @@ public class AvroBooleanConverter extends AbstractAvroValueConverter {
|
|
8
9
|
}
|
9
10
|
|
10
11
|
@Override
|
11
|
-
public
|
12
|
-
|
12
|
+
public Boolean booleanColumn(boolean value) {
|
13
|
+
return value;
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public Boolean longColumn(long value) {
|
18
|
+
return value != 0;
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public Boolean doubleColumn(double value) {
|
23
|
+
return value != 0;
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public Boolean stringColumn(String value) {
|
28
|
+
return Boolean.valueOf(value);
|
13
29
|
}
|
14
30
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-formatter-avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01-
|
11
|
+
date: 2017-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,14 +76,14 @@ files:
|
|
76
76
|
- src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java
|
77
77
|
- src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java
|
78
78
|
- src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java
|
79
|
-
- classpath/xz-1.5.jar
|
80
|
-
- classpath/jackson-mapper-asl-1.9.13.jar
|
81
79
|
- classpath/paranamer-2.7.jar
|
82
|
-
- classpath/
|
83
|
-
- classpath/commons-compress-1.8.1.jar
|
80
|
+
- classpath/xz-1.5.jar
|
84
81
|
- classpath/snappy-java-1.1.1.3.jar
|
85
|
-
- classpath/embulk-formatter-avro-0.1.1.jar
|
86
82
|
- classpath/jackson-core-asl-1.9.13.jar
|
83
|
+
- classpath/embulk-formatter-avro-0.2.1.jar
|
84
|
+
- classpath/avro-1.8.1.jar
|
85
|
+
- classpath/jackson-mapper-asl-1.9.13.jar
|
86
|
+
- classpath/commons-compress-1.8.1.jar
|
87
87
|
homepage: https://github.com/joker1007/embulk-formatter-avro
|
88
88
|
licenses:
|
89
89
|
- MIT
|