embulk-formatter-avro 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9fe7f1bec5a58a73ef465e0dbe1d905a11af5ff0
4
- data.tar.gz: 40f003f4911ab7c635efb83b8a67820baad5ffa4
3
+ metadata.gz: 297627941b7e4e884febe21ab19701ee8d136d95
4
+ data.tar.gz: 9d6745e4e739064db4fe91e78c0cc39fe3948488
5
5
  SHA512:
6
- metadata.gz: 96b5a25afb51f07f9e629e00fd7536140819eaeb2d70d7cb548decb9139de5919b33ca9ee9f72adfe9924a13e1c1822fa61dcabb286603ab596fd5ca40426b17
7
- data.tar.gz: c105fe45e42fffa3a6c45a74b12b4f5a242018fb2d262b0b623ee8e4356aec5f7aaece17b5e0e785f5295d0e324698e24c7011376e718ec8b2b765eae5ff2757
6
+ metadata.gz: dd3fa9f5211bb629007fe942a68bfa99a66c31167864ef241258611c125bd77d232bbdcd97960369d3e04b56467c9dc9e11bf8cb8f955877fd9ea244f5242ffb
7
+ data.tar.gz: c8884bc84f00bf6720728db03033370ba5285d64431a12424f034a104488eb78aa4f9463e6c8e762d698d04992de40e13a1585c0da01479b10a6e3b4c85df1c2
data/README.md CHANGED
@@ -28,6 +28,8 @@ See. [AvroValueConverters](https://github.com/joker1007/embulk-formatter-avro/tr
28
28
  ## Configuration
29
29
 
30
30
  - **avsc**: avro schema (avsc) filepath (string, required)
31
+ - **codec**: avro codec type (enum: `deflate`, `bzip2`, `xz`, `snappy`, optional)
32
+ - **compression\_level**: avro codec compression level (integer, optional, for only `deflate` and `xz` codec)
31
33
  - **skip\_error\_record**: If you want to skip error record, set true (boolean, default: `false`)
32
34
 
33
35
  ## Example
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.1"
16
+ version = "0.2.1"
17
17
 
18
18
  sourceCompatibility = 1.7
19
19
  targetCompatibility = 1.7
@@ -23,6 +23,7 @@ in:
23
23
  - {name: data2, type: json}
24
24
  - {name: type, type: string}
25
25
  - {name: md5, type: string}
26
+ - {name: flag, type: boolean}
26
27
 
27
28
  out:
28
29
  type: file
@@ -1,7 +1,7 @@
1
- id,account,time,purchase,comment,data,data2,type,md5
2
- 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9
3
- 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL
4
- 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll
5
- 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29
6
- 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL
7
- 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL
1
+ id,account,time,purchase,comment,data,data2,type,md5,flag
2
+ 1,32864,2015-01-27 19:23:49,20150127,embulk,"{\"foo\": \"bar\", \"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[{\"hoge\": 1}]","A",6c3983adda53b2136532dbd5b4d668b9,true
3
+ 2,14824,2015-01-27 19:01:23,20150127,embulk jruby,"{\"events\": [{\"id\": 1, \"name\": \"Name1\"}, {\"id\": 2, \"name\": \"Name2\"}]}","[]","B",NULL,true
4
+ 3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin",NULL,"[]","C",8c3983ad4jsal42010325kld024954ll,true
5
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","A",474a3bdb966b40c80acb069f73441f29,true
6
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,"[]","D",NULL,false
7
+ 4,11270,2015-01-29 11:54:36,20150129,NULL,NULL,NULL,"A",NULL,false
@@ -28,6 +28,7 @@
28
28
  }
29
29
  }},
30
30
  {"name": "type", "type": {"type": "enum", "name": "type_enum", "symbols": ["A", "B", "C"]}},
31
- {"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]}
31
+ {"name": "md5", "type": ["null", {"type": "fixed", "name": "md5_value", "size": 32}]},
32
+ {"name": "flag", "type": "boolean"}
32
33
  ]
33
34
  }
@@ -1,5 +1,9 @@
1
1
  package org.embulk.formatter.avro;
2
2
 
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.google.common.base.Optional;
6
+ import org.apache.avro.file.CodecFactory;
3
7
  import org.apache.avro.file.DataFileWriter;
4
8
  import org.apache.avro.generic.GenericData;
5
9
  import org.apache.avro.generic.GenericDatumWriter;
@@ -30,6 +34,7 @@ import org.slf4j.Logger;
30
34
 
31
35
  import java.io.File;
32
36
  import java.io.IOException;
37
+ import java.util.Locale;
33
38
  import java.util.Map;
34
39
 
35
40
  public class AvroFormatterPlugin
@@ -41,6 +46,14 @@ public class AvroFormatterPlugin
41
46
  @Config("avsc")
42
47
  LocalFile getAvsc();
43
48
 
49
+ @Config("codec")
50
+ @ConfigDefault("null")
51
+ Optional<Codec> getCodec();
52
+
53
+ @Config("compression_level")
54
+ @ConfigDefault("null")
55
+ Optional<Integer> getCompressionLevel();
56
+
44
57
  @Config("column_options")
45
58
  @ConfigDefault("{}")
46
59
  Map<String, TimestampFormatter.TimestampColumnOption> getColumnOptions();
@@ -95,6 +108,7 @@ public class AvroFormatterPlugin
95
108
  avroSchema = new org.apache.avro.Schema.Parser().parse(avsc);
96
109
  GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(avroSchema);
97
110
  writer = new DataFileWriter<>(datumWriter);
111
+ writer.setCodec(task.getCodec().or(Codec.NULL).getCodecFactory(task.getCompressionLevel()));
98
112
  stream.nextFile();
99
113
  writer.create(avroSchema, stream);
100
114
  } catch (IOException e) {
@@ -166,4 +180,56 @@ public class AvroFormatterPlugin
166
180
  }
167
181
  };
168
182
  }
183
+
184
+ public enum Codec {
185
+ NULL {
186
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
187
+ return CodecFactory.nullCodec();
188
+ }
189
+ },
190
+ DEFLATE {
191
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
192
+ return CodecFactory.deflateCodec(compressionLevel.or(CodecFactory.DEFAULT_DEFLATE_LEVEL));
193
+ }
194
+ },
195
+ XZ {
196
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
197
+ return CodecFactory.xzCodec(compressionLevel.or(CodecFactory.DEFAULT_XZ_LEVEL));
198
+ }
199
+ },
200
+ SNAPPY {
201
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
202
+ return CodecFactory.snappyCodec();
203
+ }
204
+ },
205
+ BZIP2 {
206
+ public CodecFactory getCodecFactory(Optional<Integer> compressionLevel) {
207
+ return CodecFactory.bzip2Codec();
208
+ }
209
+ };
210
+
211
+ @JsonValue
212
+ @Override
213
+ public String toString() {
214
+ return name().toLowerCase(Locale.ENGLISH);
215
+ }
216
+
217
+ abstract public CodecFactory getCodecFactory(Optional<Integer> compressionLevel);
218
+
219
+ @JsonCreator
220
+ public static Codec fromString(String name) {
221
+ switch (name) {
222
+ case "deflate":
223
+ return DEFLATE;
224
+ case "xz":
225
+ return XZ;
226
+ case "snappy":
227
+ return SNAPPY;
228
+ case "bzip2":
229
+ return BZIP2;
230
+ default:
231
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are single_column, multi_column", name));
232
+ }
233
+ }
234
+ }
169
235
  }
@@ -1,6 +1,7 @@
1
1
  package org.embulk.formatter.avro.converter;
2
2
 
3
3
  import org.apache.avro.Schema;
4
+ import org.msgpack.value.Value;
4
5
 
5
6
  public class AvroBooleanConverter extends AbstractAvroValueConverter {
6
7
  public AvroBooleanConverter(Schema schema) {
@@ -8,7 +9,22 @@ public class AvroBooleanConverter extends AbstractAvroValueConverter {
8
9
  }
9
10
 
10
11
  @Override
11
- public Object booleanColumn(boolean value) {
12
- throw new RuntimeException("Unsupported typecasting");
12
+ public Boolean booleanColumn(boolean value) {
13
+ return value;
14
+ }
15
+
16
+ @Override
17
+ public Boolean longColumn(long value) {
18
+ return value != 0;
19
+ }
20
+
21
+ @Override
22
+ public Boolean doubleColumn(double value) {
23
+ return value != 0;
24
+ }
25
+
26
+ @Override
27
+ public Boolean stringColumn(String value) {
28
+ return Boolean.valueOf(value);
13
29
  }
14
30
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-formatter-avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-04 00:00:00.000000000 Z
11
+ date: 2017-01-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -76,14 +76,14 @@ files:
76
76
  - src/main/java/org/embulk/formatter/avro/converter/AvroStringConverter.java
77
77
  - src/main/java/org/embulk/formatter/avro/converter/AvroValueConverterFactory.java
78
78
  - src/test/java/org/embulk/formatter/avro/TestAvroFormatterPlugin.java
79
- - classpath/xz-1.5.jar
80
- - classpath/jackson-mapper-asl-1.9.13.jar
81
79
  - classpath/paranamer-2.7.jar
82
- - classpath/avro-1.8.1.jar
83
- - classpath/commons-compress-1.8.1.jar
80
+ - classpath/xz-1.5.jar
84
81
  - classpath/snappy-java-1.1.1.3.jar
85
- - classpath/embulk-formatter-avro-0.1.1.jar
86
82
  - classpath/jackson-core-asl-1.9.13.jar
83
+ - classpath/embulk-formatter-avro-0.2.1.jar
84
+ - classpath/avro-1.8.1.jar
85
+ - classpath/jackson-mapper-asl-1.9.13.jar
86
+ - classpath/commons-compress-1.8.1.jar
87
87
  homepage: https://github.com/joker1007/embulk-formatter-avro
88
88
  licenses:
89
89
  - MIT