embulk-output-orc 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/classpath/embulk-output-orc-0.2.4.jar +0 -0
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +7 -87
- data/src/main/java/org/embulk/output/orc/PluginTask.java +55 -0
- data/src/main/java/org/embulk/output/orc/TimestampColumnOption.java +22 -0
- metadata +5 -4
- data/classpath/embulk-output-orc-0.2.2.jar +0 -0
- data/src/main/java/org/embulk/output/orc/OrcCodec.java +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 018f6f65a5d6949886d5d0e3b5758befee6d40bf
|
4
|
+
data.tar.gz: f5cf2c9745105300c5f15031f4eaa787ebf3b7b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7da18070bcf26399ea3835c0336f922518f15e9f1e4fe27f6da32e0a7254234e40f70fbce934b7f3f4dfcef06af08687e20a7ca58b82b1ed16edf1d762878c7d
|
7
|
+
data.tar.gz: 93224c587d49b5d758d5d311c8ba88efd45340f0d4d5a0e7f9cdd895b39160358c339ae761e83f1fb7e5aa941b5369cc69e77c45688d92a52dc24a0d1dc9ffe1
|
data/build.gradle
CHANGED
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
1
|
package org.embulk.output.orc;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import com.google.common.base.Throwables;
|
5
4
|
import org.apache.hadoop.conf.Configuration;
|
6
5
|
import org.apache.hadoop.fs.LocalFileSystem;
|
@@ -12,11 +11,8 @@ import org.apache.orc.CompressionKind;
|
|
12
11
|
import org.apache.orc.OrcFile;
|
13
12
|
import org.apache.orc.TypeDescription;
|
14
13
|
import org.apache.orc.Writer;
|
15
|
-
import org.embulk.config.Config;
|
16
|
-
import org.embulk.config.ConfigDefault;
|
17
14
|
import org.embulk.config.ConfigDiff;
|
18
15
|
import org.embulk.config.ConfigSource;
|
19
|
-
import org.embulk.config.Task;
|
20
16
|
import org.embulk.config.TaskReport;
|
21
17
|
import org.embulk.config.TaskSource;
|
22
18
|
import org.embulk.spi.Column;
|
@@ -30,72 +26,13 @@ import org.embulk.spi.time.TimestampFormatter;
|
|
30
26
|
import org.embulk.spi.type.Type;
|
31
27
|
import org.embulk.spi.util.Timestamps;
|
32
28
|
import org.embulk.util.aws.credentials.AwsCredentials;
|
33
|
-
import org.embulk.util.aws.credentials.AwsCredentialsTask;
|
34
|
-
import org.joda.time.DateTimeZone;
|
35
29
|
|
36
30
|
import java.io.IOException;
|
37
|
-
import java.util.ArrayList;
|
38
31
|
import java.util.List;
|
39
|
-
import java.util.Map;
|
40
32
|
|
41
33
|
public class OrcOutputPlugin
|
42
34
|
implements OutputPlugin
|
43
35
|
{
|
44
|
-
public interface PluginTask
|
45
|
-
extends Task, TimestampFormatter.Task, AwsCredentialsTask
|
46
|
-
{
|
47
|
-
@Config("path_prefix")
|
48
|
-
String getPathPrefix();
|
49
|
-
|
50
|
-
@Config("file_ext")
|
51
|
-
@ConfigDefault("\".orc\"")
|
52
|
-
String getFileNameExtension();
|
53
|
-
|
54
|
-
@Config("column_options")
|
55
|
-
@ConfigDefault("{}")
|
56
|
-
Map<String, TimestampColumnOption> getColumnOptions();
|
57
|
-
|
58
|
-
@Config("sequence_format")
|
59
|
-
@ConfigDefault("\".%03d\"")
|
60
|
-
String getSequenceFormat();
|
61
|
-
|
62
|
-
// ORC File options
|
63
|
-
@Config("strip_size")
|
64
|
-
@ConfigDefault("100000")
|
65
|
-
Integer getStripSize();
|
66
|
-
|
67
|
-
@Config("buffer_size")
|
68
|
-
@ConfigDefault("10000")
|
69
|
-
Integer getBufferSize();
|
70
|
-
|
71
|
-
@Config("compression_kind")
|
72
|
-
@ConfigDefault("ZLIB")
|
73
|
-
public String getCompressionKind();
|
74
|
-
|
75
|
-
@Config("overwrite")
|
76
|
-
@ConfigDefault("false")
|
77
|
-
boolean getOverwrite();
|
78
|
-
|
79
|
-
@Config("default_from_timezone")
|
80
|
-
@ConfigDefault("\"UTC\"")
|
81
|
-
DateTimeZone getDefaultFromTimeZone();
|
82
|
-
|
83
|
-
@Config("endpoint")
|
84
|
-
@ConfigDefault("null")
|
85
|
-
Optional<String> getEndpoint();
|
86
|
-
}
|
87
|
-
|
88
|
-
public interface TimestampColumnOption
|
89
|
-
extends Task, TimestampFormatter.TimestampColumnOption
|
90
|
-
{
|
91
|
-
@Config("from_timezone")
|
92
|
-
@ConfigDefault("null")
|
93
|
-
Optional<DateTimeZone> getFromTimeZone();
|
94
|
-
|
95
|
-
@Config("from_format")
|
96
|
-
@ConfigDefault("null")
|
97
|
-
Optional<List<String>> getFromFormat();
|
98
|
-
}
|
99
36
|
|
100
37
|
@Override
|
101
38
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -237,24 +174,7 @@ public class OrcOutputPlugin
|
|
237
174
|
final Integer bufferSize = task.getBufferSize();
|
238
175
|
final Integer stripSize = task.getStripSize();
|
239
176
|
final String kindString = task.getCompressionKind();
|
240
|
-
CompressionKind kind;
|
241
|
-
switch (kindString) {
|
242
|
-
case "ZLIB":
|
243
|
-
kind = CompressionKind.ZLIB;
|
244
|
-
break;
|
245
|
-
case "SNAPPY":
|
246
|
-
kind = CompressionKind.SNAPPY;
|
247
|
-
break;
|
248
|
-
case "LZO":
|
249
|
-
kind = CompressionKind.LZO;
|
250
|
-
break;
|
251
|
-
case "LZ4":
|
252
|
-
kind = CompressionKind.LZ4;
|
253
|
-
break;
|
254
|
-
default:
|
255
|
-
kind = CompressionKind.NONE;
|
256
|
-
break;
|
257
|
-
}
|
177
|
+
CompressionKind kind = CompressionKind.valueOf(kindString);
|
258
178
|
return OrcFile.writerOptions(conf).
|
259
179
|
bufferSize(bufferSize)
|
260
180
|
.stripeSize(stripSize)
|
@@ -266,7 +186,6 @@ public class OrcOutputPlugin
|
|
266
186
|
{
|
267
187
|
private final PageReader reader;
|
268
188
|
private final Writer writer;
|
269
|
-
private final ArrayList<VectorizedRowBatch> rowBatches = new ArrayList<>();
|
270
189
|
|
271
190
|
public OrcTransactionalPageOutput(PageReader reader, Writer writer, PluginTask task)
|
272
191
|
{
|
@@ -290,8 +209,12 @@ public class OrcOutputPlugin
|
|
290
209
|
);
|
291
210
|
i++;
|
292
211
|
}
|
293
|
-
|
294
|
-
|
212
|
+
try {
|
213
|
+
writer.addRowBatch(batch);
|
214
|
+
batch.reset();
|
215
|
+
}
|
216
|
+
catch (IOException e) {
|
217
|
+
e.printStackTrace();
|
295
218
|
}
|
296
219
|
}
|
297
220
|
|
@@ -299,9 +222,6 @@ public class OrcOutputPlugin
|
|
299
222
|
public void finish()
|
300
223
|
{
|
301
224
|
try {
|
302
|
-
for (VectorizedRowBatch batch : rowBatches) {
|
303
|
-
writer.addRowBatch(batch);
|
304
|
-
}
|
305
225
|
writer.close();
|
306
226
|
}
|
307
227
|
catch (IOException e) {
|
@@ -0,0 +1,55 @@
|
|
1
|
+
package org.embulk.output.orc;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.spi.time.TimestampFormatter;
|
8
|
+
import org.embulk.util.aws.credentials.AwsCredentialsTask;
|
9
|
+
import org.joda.time.DateTimeZone;
|
10
|
+
|
11
|
+
import java.util.Map;
|
12
|
+
|
13
|
+
public interface PluginTask
|
14
|
+
extends Task, TimestampFormatter.Task, AwsCredentialsTask
|
15
|
+
{
|
16
|
+
@Config("path_prefix")
|
17
|
+
String getPathPrefix();
|
18
|
+
|
19
|
+
@Config("file_ext")
|
20
|
+
@ConfigDefault("\".orc\"")
|
21
|
+
String getFileNameExtension();
|
22
|
+
|
23
|
+
@Config("column_options")
|
24
|
+
@ConfigDefault("{}")
|
25
|
+
Map<String, TimestampColumnOption> getColumnOptions();
|
26
|
+
|
27
|
+
@Config("sequence_format")
|
28
|
+
@ConfigDefault("\".%03d\"")
|
29
|
+
String getSequenceFormat();
|
30
|
+
|
31
|
+
// ORC File options
|
32
|
+
@Config("strip_size")
|
33
|
+
@ConfigDefault("100000")
|
34
|
+
Integer getStripSize();
|
35
|
+
|
36
|
+
@Config("buffer_size")
|
37
|
+
@ConfigDefault("10000")
|
38
|
+
Integer getBufferSize();
|
39
|
+
|
40
|
+
@Config("compression_kind")
|
41
|
+
@ConfigDefault("ZLIB")
|
42
|
+
public String getCompressionKind();
|
43
|
+
|
44
|
+
@Config("overwrite")
|
45
|
+
@ConfigDefault("false")
|
46
|
+
boolean getOverwrite();
|
47
|
+
|
48
|
+
@Config("default_from_timezone")
|
49
|
+
@ConfigDefault("\"UTC\"")
|
50
|
+
DateTimeZone getDefaultFromTimeZone();
|
51
|
+
|
52
|
+
@Config("endpoint")
|
53
|
+
@ConfigDefault("null")
|
54
|
+
Optional<String> getEndpoint();
|
55
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.output.orc;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.spi.time.TimestampFormatter;
|
8
|
+
import org.joda.time.DateTimeZone;
|
9
|
+
|
10
|
+
import java.util.List;
|
11
|
+
|
12
|
+
public interface TimestampColumnOption
|
13
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
14
|
+
{
|
15
|
+
@Config("from_timezone")
|
16
|
+
@ConfigDefault("null")
|
17
|
+
Optional<DateTimeZone> getFromTimeZone();
|
18
|
+
|
19
|
+
@Config("from_format")
|
20
|
+
@ConfigDefault("null")
|
21
|
+
Optional<List<String>> getFromFormat();
|
22
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,10 +58,11 @@ files:
|
|
58
58
|
- gradlew
|
59
59
|
- gradlew.bat
|
60
60
|
- lib/embulk/output/orc.rb
|
61
|
-
- src/main/java/org/embulk/output/orc/OrcCodec.java
|
62
61
|
- src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
|
63
62
|
- src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
|
64
63
|
- src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
|
64
|
+
- src/main/java/org/embulk/output/orc/PluginTask.java
|
65
|
+
- src/main/java/org/embulk/output/orc/TimestampColumnOption.java
|
65
66
|
- src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
|
66
67
|
- classpath/activation-1.1.jar
|
67
68
|
- classpath/aircompressor-0.3.jar
|
@@ -93,7 +94,7 @@ files:
|
|
93
94
|
- classpath/curator-client-2.7.1.jar
|
94
95
|
- classpath/curator-framework-2.7.1.jar
|
95
96
|
- classpath/curator-recipes-2.7.1.jar
|
96
|
-
- classpath/embulk-output-orc-0.2.
|
97
|
+
- classpath/embulk-output-orc-0.2.4.jar
|
97
98
|
- classpath/embulk-util-aws-credentials-0.2.8.jar
|
98
99
|
- classpath/gson-2.2.4.jar
|
99
100
|
- classpath/hadoop-annotations-2.7.3.jar
|
Binary file
|
@@ -1,21 +0,0 @@
|
|
1
|
-
package org.embulk.output.orc;
|
2
|
-
|
3
|
-
public enum OrcCodec
|
4
|
-
{
|
5
|
-
ZLIB("zlib"),
|
6
|
-
SNAPPY("snappy"),
|
7
|
-
LZO("lzo"),
|
8
|
-
LZ4("lz4"),
|
9
|
-
NONE("none"),;
|
10
|
-
String kind;
|
11
|
-
|
12
|
-
OrcCodec(String kind)
|
13
|
-
{
|
14
|
-
this.kind = kind;
|
15
|
-
}
|
16
|
-
|
17
|
-
public String getKind()
|
18
|
-
{
|
19
|
-
return kind;
|
20
|
-
}
|
21
|
-
}
|