embulk-formatter-avro 0.1.1 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9fe7f1bec5a58a73ef465e0dbe1d905a11af5ff0
4
- data.tar.gz: 40f003f4911ab7c635efb83b8a67820baad5ffa4
3
+ metadata.gz: 297627941b7e4e884febe21ab19701ee8d136d95
4
+ data.tar.gz: 9d6745e4e739064db4fe91e78c0cc39fe3948488
5
5
  SHA512:
6
- metadata.gz: 96b5a25afb51f07f9e629e00fd7536140819eaeb2d70d7cb548decb9139de5919b33ca9ee9f72adfe9924a13e1c1822fa61dcabb286603ab596fd5ca40426b17
7
- data.tar.gz: c105fe45e42fffa3a6c45a74b12b4f5a242018fb2d262b0b623ee8e4356aec5f7aaece17b5e0e785f5295d0e324698e24c7011376e718ec8b2b765eae5ff2757
6
+ metadata.gz: dd3fa9f5211bb629007fe942a68bfa99a66c31167864ef241258611c125bd77d232bbdcd97960369d3e04b56467c9dc9e11bf8cb8f955877fd9ea244f5242ffb
7
+ data.tar.gz: c8884bc84f00bf6720728db03033370ba5285d64431a12424f034a104488eb78aa4f9463e6c8e762d698d04992de40e13a1585c0da01479b10a6e3b4c85df1c2
data/README.md CHANGED
@@ -28,6 +28,8 @@ See. [AvroValueConverters](https://github.com/joker1007/embulk-formatter-avro/tr
28
28
  ## Configuration
29
29
 
30
30
  - **avsc**: avro schema (avsc) filepath (string, required)
31
+ - **codec**: avro codec type (enum: `deflate`, `bzip2`, `xz`, `snappy`, optional)
32
+ - **compression\_level**: avro codec compression level (integer, optional, for only `deflate` and `xz` codec)
31
33
  - **skip\_error\_record**: If you want to skip error record, set true (boolean, default: `false`)
32
34
 
33
35
  ## Example
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.2.1"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -23,6 +23,7 @@ in:
23
23
  - {name: data2, type: json}
24
24
  - {name: type, type: string}
25
25
  - {name: md5, type: string}
26
+ - {name: flag, type: boolean}
26
27
 
27
28
  out:
28
29
  type: file
@@ -1,7 +1,7 @@
1
- id,account,time,purchase,comment,data,data2,type,md5
2
- 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9
3
- 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL
4
- 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll
5
- 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29
6
- 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL
7
- 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL
1
+ id,account,time,purchase,comment,data,data2,type,md5,flag
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9,true
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL,true
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll,true
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29,true
6
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL,false
7
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL,false
@@ -28,6 +28,7 @@
28
28
  }
29
29
  }},
30
30
  {"name": "type", "type": {"type": "enum", "name": "type_enum", "symbols": ["A", "B", "C"]}},
31
- {"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]}
31
+ {"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]},
32
+ {"name": "flag", "type": "boolean"}
32
33
  ]
33
34
  }
@@ -1,5 +1,9 @@
1
1
  package org.embulk.formatter.avro;
2
2
 
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.google.common.base.Optional;
6
+ import org.apache.avro.file.CodecFactory;
3
7
  import org.apache.avro.file.DataFileWriter;
4
8
  import org.apache.avro.generic.GenericData;
5
9
  import org.apache.avro.generic.GenericDatumWriter;
@@ -30,6 +34,7 @@ import org.slf4j.Logger;
30
34
 
31
35
  import java.io.File;
32
36
  import java.io.IOException;
37
+ import java.util.Locale;
33
38
  import java.util.Map;
34
39
 
35
40
  public class AvroFormatterPlugin
@@ -41,6 +46,14 @@ public class AvroFormatterPlugin
41
46
  @Config("avsc")
42
47
  LocalFile getAvsc();
43
48
 
49
+ @Config("codec")
50
+ @ConfigDefault("null")
51
+ Optional<Codec> getCodec();
52
+
53
+ @Config("compression_level")
54
+ @ConfigDefault("null")
55
+ Optional<Integer> getCompressionLevel();
56
+
44
57
  @Config("column_options")
45
58
  @ConfigDefault("{}")
46
59
  Map<String, TimestampFormatter.TimestampColumnOption> getColumnOptions();
@@ -95,6 +108,7 @@ public class AvroFormatterPlugin
95
108
  avroSchema = new org.apache.avro.Schema.Parser().parse(avsc);
96
109
  GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
97
110
  writer = new DataFileWriter<>(datumWriter);
111
+ writer.setCodec(task.getCodec().or(Codec.NULL).getCodecFactory(task.getCompressionLevel()));
98
112
  stream.nextFile();
99
113
  writer.create(avroSchema, stream);
100
114
  } catch (IOException e) {
@@ -166,4 +180,56 @@ public class AvroFormatterPlugin
166
180
  }
167
181
  };
168
182
  }
183
+
184
+ public enum Codec {
185
+ NULL {
186
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
187
+ return CodecFactory.nullCodec();
188
+ }
189
+ },
190
+ DEFLATE {
191
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
192
+ return CodecFactory.deflateCodec(compressionLevel.or(CodecFactory.DEFAULT_DEFLATE_LEVEL));
193
+ }
194
+ },
195
+ XZ {
196
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
197
+ return CodecFactory.xzCodec(compressionLevel.or(CodecFactory.DEFAULT_XZ_LEVEL));
198
+ }
199
+ },
200
+ SNAPPY {
201
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
202
+ return CodecFactory.snappyCodec();
203
+ }
204
+ },
205
+ BZIP2 {
206
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
207
+ return CodecFactory.bzip2Codec();
208
+ }
209
+ };
210
+
211
+ @JsonValue
212
+ @Override
213
+ public String toString() {
214
+ return name().toLowerCase(Locale.ENGLISH);
215
+ }
216
+
217
+ abstract public CodecFactory getCodecFactory(Optional<Integer> compressionLevel);
218
+
219
+ @JsonCreator
220
+ public static Codec fromString(String name) {
221
+ switch (name) {
222
+ case "deflate":
223
+ return DEFLATE;
224
+ case "xz":
225
+ return XZ;
226
+ case "snappy":
227
+ return SNAPPY;
228
+ case "bzip2":
229
+ return BZIP2;
230
+ default:
231
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are single_column, multi_column", name));
232
+ }
233
+ }
234
+ }
169
235
  }
@@ -1,6 +1,7 @@
1
1
  package org.embulk.formatter.avro.converter;
2
2
 
3
3
  import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
4
5
 
5
6
  public class AvroBooleanConverter extends AbstractAvroValueConverter {
6
7
  public AvroBooleanConverter(Schema schema) {
@@ -8,7 +9,22 @@ public class AvroBooleanConverter extends AbstractAvroValueConverter {
8
9
  }
9
10
 
10
11
  @Override
11
- public Object booleanColumn(boolean value) {
12
- throw new RuntimeException("Unsupported typecasting");
12
+ public Boolean booleanColumn(boolean value) {
13
+ return value;
14
+ }
15
+
16
+ @Override
17
+ public Boolean longColumn(long value) {
18
+ return value != 0;
19
+ }
20
+
21
+ @Override
22
+ public Boolean doubleColumn(double value) {
23
+ return value != 0;
24
+ }
25
+
26
+ @Override
27
+ public Boolean stringColumn(String value) {
28
+ return Boolean.valueOf(value);
13
29
  }
14
30
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-formatter-avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-04 00:00:00.000000000 Z
11
+ date: 2017-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -76,14 +76,14 @@ files:
76
76
  - src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java
77
77
  - src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java
78
78
  - src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java
79
- - classpath/xz-1.5.jar
80
- - classpath/jackson-mapper-asl-1.9.13.jar
81
79
  - classpath/paranamer-2.7.jar
82
- - classpath/avro-1.8.1.jar
83
- - classpath/commons-compress-1.8.1.jar
80
+ - classpath/xz-1.5.jar
84
81
  - classpath/snappy-java-1.1.1.3.jar
85
- - classpath/embulk-formatter-avro-0.1.1.jar
86
82
  - classpath/jackson-core-asl-1.9.13.jar
83
+ - classpath/embulk-formatter-avro-0.2.1.jar
84
+ - classpath/avro-1.8.1.jar
85
+ - classpath/jackson-mapper-asl-1.9.13.jar
86
+ - classpath/commons-compress-1.8.1.jar
87
87
  homepage: https://github.com/joker1007/embulk-formatter-avro
88
88
  licenses:
89
89
  - MIT