embulk-formatter-avro 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/build.gradle +1 -1
- data/example/config.yml +1 -0
- data/example/sample_01.csv +7 -7
- data/example/schema.avsc +2 -1
- data/src/main/java/org/embulk/formatter/avro/AvroFormatterPlugin.java +66 -0
- data/src/main/java/org/embulk/formatter/avro/converter/AvroBooleanConverter.java +18 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 297627941b7e4e884febe21ab19701ee8d136d95
|
4
|
+
data.tar.gz: 9d6745e4e739064db4fe91e78c0cc39fe3948488
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd3fa9f5211bb629007fe942a68bfa99a66c31167864ef241258611c125bd77d232bbdcd97960369d3e04b56467c9dc9e11bf8cb8f955877fd9ea244f5242ffb
|
7
|
+
data.tar.gz: c8884bc84f00bf6720728db03033370ba5285d64431a12424f034a104488eb78aa4f9463e6c8e762d698d04992de40e13a1585c0da01479b10a6e3b4c85df1c2
|
data/README.md
CHANGED
@@ -28,6 +28,8 @@ See. [AvroValueConverters](https://github.com/joker1007/embulk-formatter-avro/tr
|
|
28
28
|
## Configuration
|
29
29
|
|
30
30
|
- **avsc**: avro schema (avsc) filepath (string, required)
|
31
|
+
- **codec**: avro codec type (enum: `deflate`, `bzip2`, `xz`, `snappy`, optional)
|
32
|
+
- **compression\_level**: avro codec compression level (integer, optional, for only `deflate` and `xz` codec)
|
31
33
|
- **skip\_error\_record**: If you want to skip error record, set true (boolean, default: `false`)
|
32
34
|
|
33
35
|
## Example
|
data/build.gradle
CHANGED
data/example/config.yml
CHANGED
data/example/sample_01.csv
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
id,account,time,purchase,comment,data,data2,type,md5
|
2
|
-
1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9
|
3
|
-
2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL
|
4
|
-
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll
|
5
|
-
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29
|
6
|
-
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL
|
7
|
-
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL
|
1
|
+
id,account,time,purchase,comment,data,data2,type,md5,flag
|
2
|
+
1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9,true
|
3
|
+
2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL,true
|
4
|
+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll,true
|
5
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29,true
|
6
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL,false
|
7
|
+
4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL,false
|
data/example/schema.avsc
CHANGED
@@ -28,6 +28,7 @@
|
|
28
28
|
}
|
29
29
|
}},
|
30
30
|
{"name": "type", "type": {"type": "enum", "name": "type_enum", "symbols": ["A", "B", "C"]}},
|
31
|
-
{"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]}
|
31
|
+
{"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]},
|
32
|
+
{"name": "flag", "type": "boolean"}
|
32
33
|
]
|
33
34
|
}
|
@@ -1,5 +1,9 @@
|
|
1
1
|
package org.embulk.formatter.avro;
|
2
2
|
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
5
|
+
import com.google.common.base.Optional;
|
6
|
+
import org.apache.avro.file.CodecFactory;
|
3
7
|
import org.apache.avro.file.DataFileWriter;
|
4
8
|
import org.apache.avro.generic.GenericData;
|
5
9
|
import org.apache.avro.generic.GenericDatumWriter;
|
@@ -30,6 +34,7 @@ import org.slf4j.Logger;
|
|
30
34
|
|
31
35
|
import java.io.File;
|
32
36
|
import java.io.IOException;
|
37
|
+
import java.util.Locale;
|
33
38
|
import java.util.Map;
|
34
39
|
|
35
40
|
public class AvroFormatterPlugin
|
@@ -41,6 +46,14 @@ public class AvroFormatterPlugin
|
|
41
46
|
@Config("avsc")
|
42
47
|
LocalFile getAvsc();
|
43
48
|
|
49
|
+
@Config("codec")
|
50
|
+
@ConfigDefault("null")
|
51
|
+
Optional<Codec> getCodec();
|
52
|
+
|
53
|
+
@Config("compression_level")
|
54
|
+
@ConfigDefault("null")
|
55
|
+
Optional<Integer> getCompressionLevel();
|
56
|
+
|
44
57
|
@Config("column_options")
|
45
58
|
@ConfigDefault("{}")
|
46
59
|
Map<String, TimestampFormatter.TimestampColumnOption> getColumnOptions();
|
@@ -95,6 +108,7 @@ public class AvroFormatterPlugin
|
|
95
108
|
avroSchema = new org.apache.avro.Schema.Parser().parse(avsc);
|
96
109
|
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
|
97
110
|
writer = new DataFileWriter<>(datumWriter);
|
111
|
+
writer.setCodec(task.getCodec().or(Codec.NULL).getCodecFactory(task.getCompressionLevel()));
|
98
112
|
stream.nextFile();
|
99
113
|
writer.create(avroSchema, stream);
|
100
114
|
} catch (IOException e) {
|
@@ -166,4 +180,56 @@ public class AvroFormatterPlugin
|
|
166
180
|
}
|
167
181
|
};
|
168
182
|
}
|
183
|
+
|
184
|
+
public enum Codec {
|
185
|
+
NULL {
|
186
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
187
|
+
return CodecFactory.nullCodec();
|
188
|
+
}
|
189
|
+
},
|
190
|
+
DEFLATE {
|
191
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
192
|
+
return CodecFactory.deflateCodec(compressionLevel.or(CodecFactory.DEFAULT_DEFLATE_LEVEL));
|
193
|
+
}
|
194
|
+
},
|
195
|
+
XZ {
|
196
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
197
|
+
return CodecFactory.xzCodec(compressionLevel.or(CodecFactory.DEFAULT_XZ_LEVEL));
|
198
|
+
}
|
199
|
+
},
|
200
|
+
SNAPPY {
|
201
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
202
|
+
return CodecFactory.snappyCodec();
|
203
|
+
}
|
204
|
+
},
|
205
|
+
BZIP2 {
|
206
|
+
public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
|
207
|
+
return CodecFactory.bzip2Codec();
|
208
|
+
}
|
209
|
+
};
|
210
|
+
|
211
|
+
@JsonValue
|
212
|
+
@Override
|
213
|
+
public String toString() {
|
214
|
+
return name().toLowerCase(Locale.ENGLISH);
|
215
|
+
}
|
216
|
+
|
217
|
+
abstract public CodecFactory getCodecFactory(Optional<Integer> compressionLevel);
|
218
|
+
|
219
|
+
@JsonCreator
|
220
|
+
public static Codec fromString(String name) {
|
221
|
+
switch (name) {
|
222
|
+
case "deflate":
|
223
|
+
return DEFLATE;
|
224
|
+
case "xz":
|
225
|
+
return XZ;
|
226
|
+
case "snappy":
|
227
|
+
return SNAPPY;
|
228
|
+
case "bzip2":
|
229
|
+
return BZIP2;
|
230
|
+
default:
|
231
|
+
throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are single_column, multi_column", name));
|
232
|
+
}
|
233
|
+
}
|
234
|
+
}
|
169
235
|
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.formatter.avro.converter;
|
2
2
|
|
3
3
|
import org.apache.avro.Schema;
|
4
|
+
import org.msgpack.value.Value;
|
4
5
|
|
5
6
|
public class AvroBooleanConverter extends AbstractAvroValueConverter {
|
6
7
|
public AvroBooleanConverter(Schema schema) {
|
@@ -8,7 +9,22 @@ public class AvroBooleanConverter extends AbstractAvroValueConverter {
|
|
8
9
|
}
|
9
10
|
|
10
11
|
@Override
|
11
|
-
public
|
12
|
-
|
12
|
+
public Boolean booleanColumn(boolean value) {
|
13
|
+
return value;
|
14
|
+
}
|
15
|
+
|
16
|
+
@Override
|
17
|
+
public Boolean longColumn(long value) {
|
18
|
+
return value != 0;
|
19
|
+
}
|
20
|
+
|
21
|
+
@Override
|
22
|
+
public Boolean doubleColumn(double value) {
|
23
|
+
return value != 0;
|
24
|
+
}
|
25
|
+
|
26
|
+
@Override
|
27
|
+
public Boolean stringColumn(String value) {
|
28
|
+
return Boolean.valueOf(value);
|
13
29
|
}
|
14
30
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-formatter-avro
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01-
|
11
|
+
date: 2017-01-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -76,14 +76,14 @@ files:
|
|
76
76
|
- src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java
|
77
77
|
- src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java
|
78
78
|
- src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java
|
79
|
-
- classpath/xz-1.5.jar
|
80
|
-
- classpath/jackson-mapper-asl-1.9.13.jar
|
81
79
|
- classpath/paranamer-2.7.jar
|
82
|
-
- classpath/
|
83
|
-
- classpath/commons-compress-1.8.1.jar
|
80
|
+
- classpath/xz-1.5.jar
|
84
81
|
- classpath/snappy-java-1.1.1.3.jar
|
85
|
-
- classpath/embulk-formatter-avro-0.1.1.jar
|
86
82
|
- classpath/jackson-core-asl-1.9.13.jar
|
83
|
+
- classpath/embulk-formatter-avro-0.2.1.jar
|
84
|
+
- classpath/avro-1.8.1.jar
|
85
|
+
- classpath/jackson-mapper-asl-1.9.13.jar
|
86
|
+
- classpath/commons-compress-1.8.1.jar
|
87
87
|
homepage: https://github.com/joker1007/embulk-formatter-avro
|
88
88
|
licenses:
|
89
89
|
- MIT
|