embulk-output-orc 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 041e79d159d0ffe346b0c28f17c7009438bc65e5
4
- data.tar.gz: 13d96f705101ce32a4d389c1539ea23c50db3872
3
+ metadata.gz: 018f6f65a5d6949886d5d0e3b5758befee6d40bf
4
+ data.tar.gz: f5cf2c9745105300c5f15031f4eaa787ebf3b7b3
5
5
  SHA512:
6
- metadata.gz: fdd305bfb25dfd998f0c49e55ef4ffb916f268fa5189678102b3f6e7228532da4d3ea141afe03491a2cf11bbe394975f73868a7620ab0d410306ab998d1f0d95
7
- data.tar.gz: 88574a4b9e2982b80c93307c361ec1de26c0878d8f08acfc0333eb770df74a7e76ef38747b94a57c351d4cd42e6fce015d141ef8d94e5f47bc6ecab1eeca085e
6
+ metadata.gz: 7da18070bcf26399ea3835c0336f922518f15e9f1e4fe27f6da32e0a7254234e40f70fbce934b7f3f4dfcef06af08687e20a7ca58b82b1ed16edf1d762878c7d
7
+ data.tar.gz: 93224c587d49b5d758d5d311c8ba88efd45340f0d4d5a0e7f9cdd895b39160358c339ae761e83f1fb7e5aa941b5369cc69e77c45688d92a52dc24a0d1dc9ffe1
data/build.gradle CHANGED
@@ -18,7 +18,7 @@ configurations {
18
18
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
19
19
  }
20
20
 
21
- version = "0.2.2"
21
+ version = "0.2.4"
22
22
 
23
23
  sourceCompatibility = 1.8
24
24
  targetCompatibility = 1.8
@@ -1,6 +1,5 @@
1
1
  package org.embulk.output.orc;
2
2
 
3
- import com.google.common.base.Optional;
4
3
  import com.google.common.base.Throwables;
5
4
  import org.apache.hadoop.conf.Configuration;
6
5
  import org.apache.hadoop.fs.LocalFileSystem;
@@ -12,11 +11,8 @@ import org.apache.orc.CompressionKind;
12
11
  import org.apache.orc.OrcFile;
13
12
  import org.apache.orc.TypeDescription;
14
13
  import org.apache.orc.Writer;
15
- import org.embulk.config.Config;
16
- import org.embulk.config.ConfigDefault;
17
14
  import org.embulk.config.ConfigDiff;
18
15
  import org.embulk.config.ConfigSource;
19
- import org.embulk.config.Task;
20
16
  import org.embulk.config.TaskReport;
21
17
  import org.embulk.config.TaskSource;
22
18
  import org.embulk.spi.Column;
@@ -30,72 +26,13 @@ import org.embulk.spi.time.TimestampFormatter;
30
26
  import org.embulk.spi.type.Type;
31
27
  import org.embulk.spi.util.Timestamps;
32
28
  import org.embulk.util.aws.credentials.AwsCredentials;
33
- import org.embulk.util.aws.credentials.AwsCredentialsTask;
34
- import org.joda.time.DateTimeZone;
35
29
 
36
30
  import java.io.IOException;
37
- import java.util.ArrayList;
38
31
  import java.util.List;
39
- import java.util.Map;
40
32
 
41
33
  public class OrcOutputPlugin
42
34
  implements OutputPlugin
43
35
  {
44
- public interface PluginTask
45
- extends Task, TimestampFormatter.Task, AwsCredentialsTask
46
- {
47
- @Config("path_prefix")
48
- String getPathPrefix();
49
-
50
- @Config("file_ext")
51
- @ConfigDefault("\".orc\"")
52
- String getFileNameExtension();
53
-
54
- @Config("column_options")
55
- @ConfigDefault("{}")
56
- Map<String, TimestampColumnOption> getColumnOptions();
57
-
58
- @Config("sequence_format")
59
- @ConfigDefault("\".%03d\"")
60
- String getSequenceFormat();
61
-
62
- // ORC File options
63
- @Config("strip_size")
64
- @ConfigDefault("100000")
65
- Integer getStripSize();
66
-
67
- @Config("buffer_size")
68
- @ConfigDefault("10000")
69
- Integer getBufferSize();
70
-
71
- @Config("compression_kind")
72
- @ConfigDefault("ZLIB")
73
- public String getCompressionKind();
74
-
75
- @Config("overwrite")
76
- @ConfigDefault("false")
77
- boolean getOverwrite();
78
-
79
- @Config("default_from_timezone")
80
- @ConfigDefault("\"UTC\"")
81
- DateTimeZone getDefaultFromTimeZone();
82
-
83
- @Config("endpoint")
84
- @ConfigDefault("null")
85
- Optional<String> getEndpoint();
86
- }
87
-
88
- public interface TimestampColumnOption
89
- extends Task, TimestampFormatter.TimestampColumnOption
90
- {
91
- @Config("from_timezone")
92
- @ConfigDefault("null")
93
- Optional<DateTimeZone> getFromTimeZone();
94
-
95
- @Config("from_format")
96
- @ConfigDefault("null")
97
- Optional<List<String>> getFromFormat();
98
- }
99
36
 
100
37
  @Override
101
38
  public ConfigDiff transaction(ConfigSource config,
@@ -237,24 +174,7 @@ public class OrcOutputPlugin
237
174
  final Integer bufferSize = task.getBufferSize();
238
175
  final Integer stripSize = task.getStripSize();
239
176
  final String kindString = task.getCompressionKind();
240
- CompressionKind kind;
241
- switch (kindString) {
242
- case "ZLIB":
243
- kind = CompressionKind.ZLIB;
244
- break;
245
- case "SNAPPY":
246
- kind = CompressionKind.SNAPPY;
247
- break;
248
- case "LZO":
249
- kind = CompressionKind.LZO;
250
- break;
251
- case "LZ4":
252
- kind = CompressionKind.LZ4;
253
- break;
254
- default:
255
- kind = CompressionKind.NONE;
256
- break;
257
- }
177
+ CompressionKind kind = CompressionKind.valueOf(kindString);
258
178
  return OrcFile.writerOptions(conf).
259
179
  bufferSize(bufferSize)
260
180
  .stripeSize(stripSize)
@@ -266,7 +186,6 @@ public class OrcOutputPlugin
266
186
  {
267
187
  private final PageReader reader;
268
188
  private final Writer writer;
269
- private final ArrayList<VectorizedRowBatch> rowBatches = new ArrayList<>();
270
189
 
271
190
  public OrcTransactionalPageOutput(PageReader reader, Writer writer, PluginTask task)
272
191
  {
@@ -290,8 +209,12 @@ public class OrcOutputPlugin
290
209
  );
291
210
  i++;
292
211
  }
293
- synchronized (this) {
294
- rowBatches.add(batch);
212
+ try {
213
+ writer.addRowBatch(batch);
214
+ batch.reset();
215
+ }
216
+ catch (IOException e) {
217
+ e.printStackTrace();
295
218
  }
296
219
  }
297
220
 
@@ -299,9 +222,6 @@ public class OrcOutputPlugin
299
222
  public void finish()
300
223
  {
301
224
  try {
302
- for (VectorizedRowBatch batch : rowBatches) {
303
- writer.addRowBatch(batch);
304
- }
305
225
  writer.close();
306
226
  }
307
227
  catch (IOException e) {
@@ -0,0 +1,55 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.spi.time.TimestampFormatter;
8
+ import org.embulk.util.aws.credentials.AwsCredentialsTask;
9
+ import org.joda.time.DateTimeZone;
10
+
11
+ import java.util.Map;
12
+
13
+ public interface PluginTask
14
+ extends Task, TimestampFormatter.Task, AwsCredentialsTask
15
+ {
16
+ @Config("path_prefix")
17
+ String getPathPrefix();
18
+
19
+ @Config("file_ext")
20
+ @ConfigDefault("\".orc\"")
21
+ String getFileNameExtension();
22
+
23
+ @Config("column_options")
24
+ @ConfigDefault("{}")
25
+ Map<String, TimestampColumnOption> getColumnOptions();
26
+
27
+ @Config("sequence_format")
28
+ @ConfigDefault("\".%03d\"")
29
+ String getSequenceFormat();
30
+
31
+ // ORC File options
32
+ @Config("strip_size")
33
+ @ConfigDefault("100000")
34
+ Integer getStripSize();
35
+
36
+ @Config("buffer_size")
37
+ @ConfigDefault("10000")
38
+ Integer getBufferSize();
39
+
40
+ @Config("compression_kind")
41
+ @ConfigDefault("ZLIB")
42
+ public String getCompressionKind();
43
+
44
+ @Config("overwrite")
45
+ @ConfigDefault("false")
46
+ boolean getOverwrite();
47
+
48
+ @Config("default_from_timezone")
49
+ @ConfigDefault("\"UTC\"")
50
+ DateTimeZone getDefaultFromTimeZone();
51
+
52
+ @Config("endpoint")
53
+ @ConfigDefault("null")
54
+ Optional<String> getEndpoint();
55
+ }
@@ -0,0 +1,22 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.spi.time.TimestampFormatter;
8
+ import org.joda.time.DateTimeZone;
9
+
10
+ import java.util.List;
11
+
12
+ public interface TimestampColumnOption
13
+ extends Task, TimestampFormatter.TimestampColumnOption
14
+ {
15
+ @Config("from_timezone")
16
+ @ConfigDefault("null")
17
+ Optional<DateTimeZone> getFromTimeZone();
18
+
19
+ @Config("from_format")
20
+ @ConfigDefault("null")
21
+ Optional<List<String>> getFromFormat();
22
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-17 00:00:00.000000000 Z
11
+ date: 2017-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,10 +58,11 @@ files:
58
58
  - gradlew
59
59
  - gradlew.bat
60
60
  - lib/embulk/output/orc.rb
61
- - src/main/java/org/embulk/output/orc/OrcCodec.java
62
61
  - src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
63
62
  - src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
64
63
  - src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
64
+ - src/main/java/org/embulk/output/orc/PluginTask.java
65
+ - src/main/java/org/embulk/output/orc/TimestampColumnOption.java
65
66
  - src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
66
67
  - classpath/activation-1.1.jar
67
68
  - classpath/aircompressor-0.3.jar
@@ -93,7 +94,7 @@ files:
93
94
  - classpath/curator-client-2.7.1.jar
94
95
  - classpath/curator-framework-2.7.1.jar
95
96
  - classpath/curator-recipes-2.7.1.jar
96
- - classpath/embulk-output-orc-0.2.2.jar
97
+ - classpath/embulk-output-orc-0.2.4.jar
97
98
  - classpath/embulk-util-aws-credentials-0.2.8.jar
98
99
  - classpath/gson-2.2.4.jar
99
100
  - classpath/hadoop-annotations-2.7.3.jar
Binary file
@@ -1,21 +0,0 @@
1
- package org.embulk.output.orc;
2
-
3
- public enum OrcCodec
4
- {
5
- ZLIB("zlib"),
6
- SNAPPY("snappy"),
7
- LZO("lzo"),
8
- LZ4("lz4"),
9
- NONE("none"),;
10
- String kind;
11
-
12
- OrcCodec(String kind)
13
- {
14
- this.kind = kind;
15
- }
16
-
17
- public String getKind()
18
- {
19
- return kind;
20
- }
21
- }