embulk-output-orc 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 018f6f65a5d6949886d5d0e3b5758befee6d40bf
4
- data.tar.gz: f5cf2c9745105300c5f15031f4eaa787ebf3b7b3
3
+ metadata.gz: 5bf0784f61bbc808d36ebce5e46aaab889b891a3
4
+ data.tar.gz: 8937c475721a4f2c347575580982ce2a772f8d63
5
5
  SHA512:
6
- metadata.gz: 7da18070bcf26399ea3835c0336f922518f15e9f1e4fe27f6da32e0a7254234e40f70fbce934b7f3f4dfcef06af08687e20a7ca58b82b1ed16edf1d762878c7d
7
- data.tar.gz: 93224c587d49b5d758d5d311c8ba88efd45340f0d4d5a0e7f9cdd895b39160358c339ae761e83f1fb7e5aa941b5369cc69e77c45688d92a52dc24a0d1dc9ffe1
6
+ metadata.gz: 23a1a87ca07df8ebc6d17575a3abcf58cf9c7eb5cd6569ba62cfd7fa3cb52c42cc27a00e65f99d821c396e897ebaac78c3dc5dfe9ee6a750049c2017f08d9fa5
7
+ data.tar.gz: 0c08613e8c5182987a4bbb03ae3a0ce9eddb474a1b8672aa5fbc25e69da4ea0784a9982c6bc3888263eddb68213ec885eb3bb2000aaf187cb94148fa593a780d
data/README.md CHANGED
@@ -16,8 +16,9 @@
16
16
  - support: `file`, `s3n` and `s3a`.
17
17
  - **file_ext**: An extension of output file. (string, default: `.orc`)
18
18
  - **sequence_format**: (string, default: `.%03d`)
19
- - **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
20
- - **strip_size**: Set the ORC strip size (integer, default: `100000`)
19
+ - **buffer_size**: Set the ORC buffer size (integer, default: `262144`)
20
+ - **strip_size**: Set the ORC strip size (integer, default: `67108864`)
21
+ - **block_size**: Set the ORC block size (integer, default: `268435456`)
21
22
  - **compression_kind**: description (string, default: `'ZLIB'`)
22
23
  - `NONE`, `ZLIB`, `SNAPPY`
23
24
  - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
@@ -18,7 +18,7 @@ configurations {
18
18
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
19
19
  }
20
20
 
21
- version = "0.2.4"
21
+ version = "0.3.0"
22
22
 
23
23
  sourceCompatibility = 1.8
24
24
  targetCompatibility = 1.8
@@ -33,7 +33,6 @@ import java.util.List;
33
33
  public class OrcOutputPlugin
34
34
  implements OutputPlugin
35
35
  {
36
-
37
36
  @Override
38
37
  public ConfigDiff transaction(ConfigSource config,
39
38
  Schema schema, int taskCount,
@@ -173,10 +172,12 @@ public class OrcOutputPlugin
173
172
  {
174
173
  final Integer bufferSize = task.getBufferSize();
175
174
  final Integer stripSize = task.getStripSize();
175
+ final Integer blockSize = task.getBlockSize();
176
176
  final String kindString = task.getCompressionKind();
177
177
  CompressionKind kind = CompressionKind.valueOf(kindString);
178
- return OrcFile.writerOptions(conf).
179
- bufferSize(bufferSize)
178
+ return OrcFile.writerOptions(conf)
179
+ .bufferSize(bufferSize)
180
+ .blockSize(blockSize)
180
181
  .stripeSize(stripSize)
181
182
  .compress(kind);
182
183
  }
@@ -28,15 +28,20 @@ public interface PluginTask
28
28
  @ConfigDefault("\".%03d\"")
29
29
  String getSequenceFormat();
30
30
 
31
+ // see: https://orc.apache.org/docs/hive-config.html
31
32
  // ORC File options
32
33
  @Config("strip_size")
33
- @ConfigDefault("100000")
34
+ @ConfigDefault("67108864") // 64MB
34
35
  Integer getStripSize();
35
36
 
36
37
  @Config("buffer_size")
37
- @ConfigDefault("10000")
38
+ @ConfigDefault("262144") // 256KB
38
39
  Integer getBufferSize();
39
40
 
41
+ @Config("block_size")
42
+ @ConfigDefault("268435456") // 256MB
43
+ Integer getBlockSize();
44
+
40
45
  @Config("compression_kind")
41
46
  @ConfigDefault("ZLIB")
42
47
  public String getCompressionKind();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-02 00:00:00.000000000 Z
11
+ date: 2017-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -94,7 +94,7 @@ files:
94
94
  - classpath/curator-client-2.7.1.jar
95
95
  - classpath/curator-framework-2.7.1.jar
96
96
  - classpath/curator-recipes-2.7.1.jar
97
- - classpath/embulk-output-orc-0.2.4.jar
97
+ - classpath/embulk-output-orc-0.3.0.jar
98
98
  - classpath/embulk-util-aws-credentials-0.2.8.jar
99
99
  - classpath/gson-2.2.4.jar
100
100
  - classpath/hadoop-annotations-2.7.3.jar