embulk-output-orc 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 018f6f65a5d6949886d5d0e3b5758befee6d40bf
4
- data.tar.gz: f5cf2c9745105300c5f15031f4eaa787ebf3b7b3
3
+ metadata.gz: 5bf0784f61bbc808d36ebce5e46aaab889b891a3
4
+ data.tar.gz: 8937c475721a4f2c347575580982ce2a772f8d63
5
5
  SHA512:
6
- metadata.gz: 7da18070bcf26399ea3835c0336f922518f15e9f1e4fe27f6da32e0a7254234e40f70fbce934b7f3f4dfcef06af08687e20a7ca58b82b1ed16edf1d762878c7d
7
- data.tar.gz: 93224c587d49b5d758d5d311c8ba88efd45340f0d4d5a0e7f9cdd895b39160358c339ae761e83f1fb7e5aa941b5369cc69e77c45688d92a52dc24a0d1dc9ffe1
6
+ metadata.gz: 23a1a87ca07df8ebc6d17575a3abcf58cf9c7eb5cd6569ba62cfd7fa3cb52c42cc27a00e65f99d821c396e897ebaac78c3dc5dfe9ee6a750049c2017f08d9fa5
7
+ data.tar.gz: 0c08613e8c5182987a4bbb03ae3a0ce9eddb474a1b8672aa5fbc25e69da4ea0784a9982c6bc3888263eddb68213ec885eb3bb2000aaf187cb94148fa593a780d
data/README.md CHANGED
@@ -16,8 +16,9 @@
16
16
  - support: `file`, `s3n` and `s3a`.
17
17
  - **file_ext**: An extension of output file. (string, default: `.orc`)
18
18
  - **sequence_format**: (string, default: `.%03d`)
19
- - **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
20
- - **strip_size**: Set the ORC strip size (integer, default: `100000`)
19
+ - **buffer_size**: Set the ORC buffer size (integer, default: `262144`)
20
+ - **strip_size**: Set the ORC strip size (integer, default: `67108864`)
21
+ - **block_size**: Set the ORC block size (integer, default: `268435456`)
21
22
  - **compression_kind**: description (string, default: `'ZLIB'`)
22
23
  - `NONE`, `ZLIB`, `SNAPPY`
23
24
  - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
@@ -18,7 +18,7 @@ configurations {
18
18
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
19
19
  }
20
20
 
21
- version = "0.2.4"
21
+ version = "0.3.0"
22
22
 
23
23
  sourceCompatibility = 1.8
24
24
  targetCompatibility = 1.8
@@ -33,7 +33,6 @@ import java.util.List;
33
33
  public class OrcOutputPlugin
34
34
  implements OutputPlugin
35
35
  {
36
-
37
36
  @Override
38
37
  public ConfigDiff transaction(ConfigSource config,
39
38
  Schema schema, int taskCount,
@@ -173,10 +172,12 @@ public class OrcOutputPlugin
173
172
  {
174
173
  final Integer bufferSize = task.getBufferSize();
175
174
  final Integer stripSize = task.getStripSize();
175
+ final Integer blockSize = task.getBlockSize();
176
176
  final String kindString = task.getCompressionKind();
177
177
  CompressionKind kind = CompressionKind.valueOf(kindString);
178
- return OrcFile.writerOptions(conf).
179
- bufferSize(bufferSize)
178
+ return OrcFile.writerOptions(conf)
179
+ .bufferSize(bufferSize)
180
+ .blockSize(blockSize)
180
181
  .stripeSize(stripSize)
181
182
  .compress(kind);
182
183
  }
@@ -28,15 +28,20 @@ public interface PluginTask
28
28
  @ConfigDefault("\".%03d\"")
29
29
  String getSequenceFormat();
30
30
 
31
+ // see: https://orc.apache.org/docs/hive-config.html
31
32
  // ORC File options
32
33
  @Config("strip_size")
33
- @ConfigDefault("100000")
34
+ @ConfigDefault("67108864") // 64MB
34
35
  Integer getStripSize();
35
36
 
36
37
  @Config("buffer_size")
37
- @ConfigDefault("10000")
38
+ @ConfigDefault("262144") // 256KB
38
39
  Integer getBufferSize();
39
40
 
41
+ @Config("block_size")
42
+ @ConfigDefault("268435456") // 256MB
43
+ Integer getBlockSize();
44
+
40
45
  @Config("compression_kind")
41
46
  @ConfigDefault("ZLIB")
42
47
  public String getCompressionKind();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-02 00:00:00.000000000 Z
11
+ date: 2017-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -94,7 +94,7 @@ files:
94
94
  - classpath/curator-client-2.7.1.jar
95
95
  - classpath/curator-framework-2.7.1.jar
96
96
  - classpath/curator-recipes-2.7.1.jar
97
- - classpath/embulk-output-orc-0.2.4.jar
97
+ - classpath/embulk-output-orc-0.3.0.jar
98
98
  - classpath/embulk-util-aws-credentials-0.2.8.jar
99
99
  - classpath/gson-2.2.4.jar
100
100
  - classpath/hadoop-annotations-2.7.3.jar