embulk-output-orc 0.2.2 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 041e79d159d0ffe346b0c28f17c7009438bc65e5
4
- data.tar.gz: 13d96f705101ce32a4d389c1539ea23c50db3872
3
+ metadata.gz: 018f6f65a5d6949886d5d0e3b5758befee6d40bf
4
+ data.tar.gz: f5cf2c9745105300c5f15031f4eaa787ebf3b7b3
5
5
  SHA512:
6
- metadata.gz: fdd305bfb25dfd998f0c49e55ef4ffb916f268fa5189678102b3f6e7228532da4d3ea141afe03491a2cf11bbe394975f73868a7620ab0d410306ab998d1f0d95
7
- data.tar.gz: 88574a4b9e2982b80c93307c361ec1de26c0878d8f08acfc0333eb770df74a7e76ef38747b94a57c351d4cd42e6fce015d141ef8d94e5f47bc6ecab1eeca085e
6
+ metadata.gz: 7da18070bcf26399ea3835c0336f922518f15e9f1e4fe27f6da32e0a7254234e40f70fbce934b7f3f4dfcef06af08687e20a7ca58b82b1ed16edf1d762878c7d
7
+ data.tar.gz: 93224c587d49b5d758d5d311c8ba88efd45340f0d4d5a0e7f9cdd895b39160358c339ae761e83f1fb7e5aa941b5369cc69e77c45688d92a52dc24a0d1dc9ffe1
data/build.gradle CHANGED
@@ -18,7 +18,7 @@ configurations {
18
18
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
19
19
  }
20
20
 
21
- version = "0.2.2"
21
+ version = "0.2.4"
22
22
 
23
23
  sourceCompatibility = 1.8
24
24
  targetCompatibility = 1.8
@@ -1,6 +1,5 @@
1
1
  package org.embulk.output.orc;
2
2
 
3
- import com.google.common.base.Optional;
4
3
  import com.google.common.base.Throwables;
5
4
  import org.apache.hadoop.conf.Configuration;
6
5
  import org.apache.hadoop.fs.LocalFileSystem;
@@ -12,11 +11,8 @@ import org.apache.orc.CompressionKind;
12
11
  import org.apache.orc.OrcFile;
13
12
  import org.apache.orc.TypeDescription;
14
13
  import org.apache.orc.Writer;
15
- import org.embulk.config.Config;
16
- import org.embulk.config.ConfigDefault;
17
14
  import org.embulk.config.ConfigDiff;
18
15
  import org.embulk.config.ConfigSource;
19
- import org.embulk.config.Task;
20
16
  import org.embulk.config.TaskReport;
21
17
  import org.embulk.config.TaskSource;
22
18
  import org.embulk.spi.Column;
@@ -30,72 +26,13 @@ import org.embulk.spi.time.TimestampFormatter;
30
26
  import org.embulk.spi.type.Type;
31
27
  import org.embulk.spi.util.Timestamps;
32
28
  import org.embulk.util.aws.credentials.AwsCredentials;
33
- import org.embulk.util.aws.credentials.AwsCredentialsTask;
34
- import org.joda.time.DateTimeZone;
35
29
 
36
30
  import java.io.IOException;
37
- import java.util.ArrayList;
38
31
  import java.util.List;
39
- import java.util.Map;
40
32
 
41
33
  public class OrcOutputPlugin
42
34
  implements OutputPlugin
43
35
  {
44
- public interface PluginTask
45
- extends Task, TimestampFormatter.Task, AwsCredentialsTask
46
- {
47
- @Config("path_prefix")
48
- String getPathPrefix();
49
-
50
- @Config("file_ext")
51
- @ConfigDefault("\".orc\"")
52
- String getFileNameExtension();
53
-
54
- @Config("column_options")
55
- @ConfigDefault("{}")
56
- Map<String, TimestampColumnOption> getColumnOptions();
57
-
58
- @Config("sequence_format")
59
- @ConfigDefault("\".%03d\"")
60
- String getSequenceFormat();
61
-
62
- // ORC File options
63
- @Config("strip_size")
64
- @ConfigDefault("100000")
65
- Integer getStripSize();
66
-
67
- @Config("buffer_size")
68
- @ConfigDefault("10000")
69
- Integer getBufferSize();
70
-
71
- @Config("compression_kind")
72
- @ConfigDefault("ZLIB")
73
- public String getCompressionKind();
74
-
75
- @Config("overwrite")
76
- @ConfigDefault("false")
77
- boolean getOverwrite();
78
-
79
- @Config("default_from_timezone")
80
- @ConfigDefault("\"UTC\"")
81
- DateTimeZone getDefaultFromTimeZone();
82
-
83
- @Config("endpoint")
84
- @ConfigDefault("null")
85
- Optional<String> getEndpoint();
86
- }
87
-
88
- public interface TimestampColumnOption
89
- extends Task, TimestampFormatter.TimestampColumnOption
90
- {
91
- @Config("from_timezone")
92
- @ConfigDefault("null")
93
- Optional<DateTimeZone> getFromTimeZone();
94
-
95
- @Config("from_format")
96
- @ConfigDefault("null")
97
- Optional<List<String>> getFromFormat();
98
- }
99
36
 
100
37
  @Override
101
38
  public ConfigDiff transaction(ConfigSource config,
@@ -237,24 +174,7 @@ public class OrcOutputPlugin
237
174
  final Integer bufferSize = task.getBufferSize();
238
175
  final Integer stripSize = task.getStripSize();
239
176
  final String kindString = task.getCompressionKind();
240
- CompressionKind kind;
241
- switch (kindString) {
242
- case "ZLIB":
243
- kind = CompressionKind.ZLIB;
244
- break;
245
- case "SNAPPY":
246
- kind = CompressionKind.SNAPPY;
247
- break;
248
- case "LZO":
249
- kind = CompressionKind.LZO;
250
- break;
251
- case "LZ4":
252
- kind = CompressionKind.LZ4;
253
- break;
254
- default:
255
- kind = CompressionKind.NONE;
256
- break;
257
- }
177
+ CompressionKind kind = CompressionKind.valueOf(kindString);
258
178
  return OrcFile.writerOptions(conf).
259
179
  bufferSize(bufferSize)
260
180
  .stripeSize(stripSize)
@@ -266,7 +186,6 @@ public class OrcOutputPlugin
266
186
  {
267
187
  private final PageReader reader;
268
188
  private final Writer writer;
269
- private final ArrayList<VectorizedRowBatch> rowBatches = new ArrayList<>();
270
189
 
271
190
  public OrcTransactionalPageOutput(PageReader reader, Writer writer, PluginTask task)
272
191
  {
@@ -290,8 +209,12 @@ public class OrcOutputPlugin
290
209
  );
291
210
  i++;
292
211
  }
293
- synchronized (this) {
294
- rowBatches.add(batch);
212
+ try {
213
+ writer.addRowBatch(batch);
214
+ batch.reset();
215
+ }
216
+ catch (IOException e) {
217
+ e.printStackTrace();
295
218
  }
296
219
  }
297
220
 
@@ -299,9 +222,6 @@ public class OrcOutputPlugin
299
222
  public void finish()
300
223
  {
301
224
  try {
302
- for (VectorizedRowBatch batch : rowBatches) {
303
- writer.addRowBatch(batch);
304
- }
305
225
  writer.close();
306
226
  }
307
227
  catch (IOException e) {
@@ -0,0 +1,55 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.spi.time.TimestampFormatter;
8
+ import org.embulk.util.aws.credentials.AwsCredentialsTask;
9
+ import org.joda.time.DateTimeZone;
10
+
11
+ import java.util.Map;
12
+
13
+ public interface PluginTask
14
+ extends Task, TimestampFormatter.Task, AwsCredentialsTask
15
+ {
16
+ @Config("path_prefix")
17
+ String getPathPrefix();
18
+
19
+ @Config("file_ext")
20
+ @ConfigDefault("\".orc\"")
21
+ String getFileNameExtension();
22
+
23
+ @Config("column_options")
24
+ @ConfigDefault("{}")
25
+ Map<String, TimestampColumnOption> getColumnOptions();
26
+
27
+ @Config("sequence_format")
28
+ @ConfigDefault("\".%03d\"")
29
+ String getSequenceFormat();
30
+
31
+ // ORC File options
32
+ @Config("strip_size")
33
+ @ConfigDefault("100000")
34
+ Integer getStripSize();
35
+
36
+ @Config("buffer_size")
37
+ @ConfigDefault("10000")
38
+ Integer getBufferSize();
39
+
40
+ @Config("compression_kind")
41
+ @ConfigDefault("ZLIB")
42
+ public String getCompressionKind();
43
+
44
+ @Config("overwrite")
45
+ @ConfigDefault("false")
46
+ boolean getOverwrite();
47
+
48
+ @Config("default_from_timezone")
49
+ @ConfigDefault("\"UTC\"")
50
+ DateTimeZone getDefaultFromTimeZone();
51
+
52
+ @Config("endpoint")
53
+ @ConfigDefault("null")
54
+ Optional<String> getEndpoint();
55
+ }
@@ -0,0 +1,22 @@
1
+ package org.embulk.output.orc;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.spi.time.TimestampFormatter;
8
+ import org.joda.time.DateTimeZone;
9
+
10
+ import java.util.List;
11
+
12
+ public interface TimestampColumnOption
13
+ extends Task, TimestampFormatter.TimestampColumnOption
14
+ {
15
+ @Config("from_timezone")
16
+ @ConfigDefault("null")
17
+ Optional<DateTimeZone> getFromTimeZone();
18
+
19
+ @Config("from_format")
20
+ @ConfigDefault("null")
21
+ Optional<List<String>> getFromFormat();
22
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-17 00:00:00.000000000 Z
11
+ date: 2017-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,10 +58,11 @@ files:
58
58
  - gradlew
59
59
  - gradlew.bat
60
60
  - lib/embulk/output/orc.rb
61
- - src/main/java/org/embulk/output/orc/OrcCodec.java
62
61
  - src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
63
62
  - src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
64
63
  - src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
64
+ - src/main/java/org/embulk/output/orc/PluginTask.java
65
+ - src/main/java/org/embulk/output/orc/TimestampColumnOption.java
65
66
  - src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
66
67
  - classpath/activation-1.1.jar
67
68
  - classpath/aircompressor-0.3.jar
@@ -93,7 +94,7 @@ files:
93
94
  - classpath/curator-client-2.7.1.jar
94
95
  - classpath/curator-framework-2.7.1.jar
95
96
  - classpath/curator-recipes-2.7.1.jar
96
- - classpath/embulk-output-orc-0.2.2.jar
97
+ - classpath/embulk-output-orc-0.2.4.jar
97
98
  - classpath/embulk-util-aws-credentials-0.2.8.jar
98
99
  - classpath/gson-2.2.4.jar
99
100
  - classpath/hadoop-annotations-2.7.3.jar
Binary file
@@ -1,21 +0,0 @@
1
- package org.embulk.output.orc;
2
-
3
- public enum OrcCodec
4
- {
5
- ZLIB("zlib"),
6
- SNAPPY("snappy"),
7
- LZO("lzo"),
8
- LZ4("lz4"),
9
- NONE("none"),;
10
- String kind;
11
-
12
- OrcCodec(String kind)
13
- {
14
- this.kind = kind;
15
- }
16
-
17
- public String getKind()
18
- {
19
- return kind;
20
- }
21
- }