embulk-output-orc 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/classpath/embulk-output-orc-0.2.4.jar +0 -0
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +7 -87
- data/src/main/java/org/embulk/output/orc/PluginTask.java +55 -0
- data/src/main/java/org/embulk/output/orc/TimestampColumnOption.java +22 -0
- metadata +5 -4
- data/classpath/embulk-output-orc-0.2.2.jar +0 -0
- data/src/main/java/org/embulk/output/orc/OrcCodec.java +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 018f6f65a5d6949886d5d0e3b5758befee6d40bf
|
4
|
+
data.tar.gz: f5cf2c9745105300c5f15031f4eaa787ebf3b7b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7da18070bcf26399ea3835c0336f922518f15e9f1e4fe27f6da32e0a7254234e40f70fbce934b7f3f4dfcef06af08687e20a7ca58b82b1ed16edf1d762878c7d
|
7
|
+
data.tar.gz: 93224c587d49b5d758d5d311c8ba88efd45340f0d4d5a0e7f9cdd895b39160358c339ae761e83f1fb7e5aa941b5369cc69e77c45688d92a52dc24a0d1dc9ffe1
|
data/build.gradle
CHANGED
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
1
|
package org.embulk.output.orc;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import com.google.common.base.Throwables;
|
5
4
|
import org.apache.hadoop.conf.Configuration;
|
6
5
|
import org.apache.hadoop.fs.LocalFileSystem;
|
@@ -12,11 +11,8 @@ import org.apache.orc.CompressionKind;
|
|
12
11
|
import org.apache.orc.OrcFile;
|
13
12
|
import org.apache.orc.TypeDescription;
|
14
13
|
import org.apache.orc.Writer;
|
15
|
-
import org.embulk.config.Config;
|
16
|
-
import org.embulk.config.ConfigDefault;
|
17
14
|
import org.embulk.config.ConfigDiff;
|
18
15
|
import org.embulk.config.ConfigSource;
|
19
|
-
import org.embulk.config.Task;
|
20
16
|
import org.embulk.config.TaskReport;
|
21
17
|
import org.embulk.config.TaskSource;
|
22
18
|
import org.embulk.spi.Column;
|
@@ -30,72 +26,13 @@ import org.embulk.spi.time.TimestampFormatter;
|
|
30
26
|
import org.embulk.spi.type.Type;
|
31
27
|
import org.embulk.spi.util.Timestamps;
|
32
28
|
import org.embulk.util.aws.credentials.AwsCredentials;
|
33
|
-
import org.embulk.util.aws.credentials.AwsCredentialsTask;
|
34
|
-
import org.joda.time.DateTimeZone;
|
35
29
|
|
36
30
|
import java.io.IOException;
|
37
|
-
import java.util.ArrayList;
|
38
31
|
import java.util.List;
|
39
|
-
import java.util.Map;
|
40
32
|
|
41
33
|
public class OrcOutputPlugin
|
42
34
|
implements OutputPlugin
|
43
35
|
{
|
44
|
-
public interface PluginTask
|
45
|
-
extends Task, TimestampFormatter.Task, AwsCredentialsTask
|
46
|
-
{
|
47
|
-
@Config("path_prefix")
|
48
|
-
String getPathPrefix();
|
49
|
-
|
50
|
-
@Config("file_ext")
|
51
|
-
@ConfigDefault("\".orc\"")
|
52
|
-
String getFileNameExtension();
|
53
|
-
|
54
|
-
@Config("column_options")
|
55
|
-
@ConfigDefault("{}")
|
56
|
-
Map<String, TimestampColumnOption> getColumnOptions();
|
57
|
-
|
58
|
-
@Config("sequence_format")
|
59
|
-
@ConfigDefault("\".%03d\"")
|
60
|
-
String getSequenceFormat();
|
61
|
-
|
62
|
-
// ORC File options
|
63
|
-
@Config("strip_size")
|
64
|
-
@ConfigDefault("100000")
|
65
|
-
Integer getStripSize();
|
66
|
-
|
67
|
-
@Config("buffer_size")
|
68
|
-
@ConfigDefault("10000")
|
69
|
-
Integer getBufferSize();
|
70
|
-
|
71
|
-
@Config("compression_kind")
|
72
|
-
@ConfigDefault("ZLIB")
|
73
|
-
public String getCompressionKind();
|
74
|
-
|
75
|
-
@Config("overwrite")
|
76
|
-
@ConfigDefault("false")
|
77
|
-
boolean getOverwrite();
|
78
|
-
|
79
|
-
@Config("default_from_timezone")
|
80
|
-
@ConfigDefault("\"UTC\"")
|
81
|
-
DateTimeZone getDefaultFromTimeZone();
|
82
|
-
|
83
|
-
@Config("endpoint")
|
84
|
-
@ConfigDefault("null")
|
85
|
-
Optional<String> getEndpoint();
|
86
|
-
}
|
87
|
-
|
88
|
-
public interface TimestampColumnOption
|
89
|
-
extends Task, TimestampFormatter.TimestampColumnOption
|
90
|
-
{
|
91
|
-
@Config("from_timezone")
|
92
|
-
@ConfigDefault("null")
|
93
|
-
Optional<DateTimeZone> getFromTimeZone();
|
94
|
-
|
95
|
-
@Config("from_format")
|
96
|
-
@ConfigDefault("null")
|
97
|
-
Optional<List<String>> getFromFormat();
|
98
|
-
}
|
99
36
|
|
100
37
|
@Override
|
101
38
|
public ConfigDiff transaction(ConfigSource config,
|
@@ -237,24 +174,7 @@ public class OrcOutputPlugin
|
|
237
174
|
final Integer bufferSize = task.getBufferSize();
|
238
175
|
final Integer stripSize = task.getStripSize();
|
239
176
|
final String kindString = task.getCompressionKind();
|
240
|
-
CompressionKind kind;
|
241
|
-
switch (kindString) {
|
242
|
-
case "ZLIB":
|
243
|
-
kind = CompressionKind.ZLIB;
|
244
|
-
break;
|
245
|
-
case "SNAPPY":
|
246
|
-
kind = CompressionKind.SNAPPY;
|
247
|
-
break;
|
248
|
-
case "LZO":
|
249
|
-
kind = CompressionKind.LZO;
|
250
|
-
break;
|
251
|
-
case "LZ4":
|
252
|
-
kind = CompressionKind.LZ4;
|
253
|
-
break;
|
254
|
-
default:
|
255
|
-
kind = CompressionKind.NONE;
|
256
|
-
break;
|
257
|
-
}
|
177
|
+
CompressionKind kind = CompressionKind.valueOf(kindString);
|
258
178
|
return OrcFile.writerOptions(conf).
|
259
179
|
bufferSize(bufferSize)
|
260
180
|
.stripeSize(stripSize)
|
@@ -266,7 +186,6 @@ public class OrcOutputPlugin
|
|
266
186
|
{
|
267
187
|
private final PageReader reader;
|
268
188
|
private final Writer writer;
|
269
|
-
private final ArrayList<VectorizedRowBatch> rowBatches = new ArrayList<>();
|
270
189
|
|
271
190
|
public OrcTransactionalPageOutput(PageReader reader, Writer writer, PluginTask task)
|
272
191
|
{
|
@@ -290,8 +209,12 @@ public class OrcOutputPlugin
|
|
290
209
|
);
|
291
210
|
i++;
|
292
211
|
}
|
293
|
-
|
294
|
-
|
212
|
+
try {
|
213
|
+
writer.addRowBatch(batch);
|
214
|
+
batch.reset();
|
215
|
+
}
|
216
|
+
catch (IOException e) {
|
217
|
+
e.printStackTrace();
|
295
218
|
}
|
296
219
|
}
|
297
220
|
|
@@ -299,9 +222,6 @@ public class OrcOutputPlugin
|
|
299
222
|
public void finish()
|
300
223
|
{
|
301
224
|
try {
|
302
|
-
for (VectorizedRowBatch batch : rowBatches) {
|
303
|
-
writer.addRowBatch(batch);
|
304
|
-
}
|
305
225
|
writer.close();
|
306
226
|
}
|
307
227
|
catch (IOException e) {
|
@@ -0,0 +1,55 @@
|
|
1
|
+
package org.embulk.output.orc;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.spi.time.TimestampFormatter;
|
8
|
+
import org.embulk.util.aws.credentials.AwsCredentialsTask;
|
9
|
+
import org.joda.time.DateTimeZone;
|
10
|
+
|
11
|
+
import java.util.Map;
|
12
|
+
|
13
|
+
public interface PluginTask
|
14
|
+
extends Task, TimestampFormatter.Task, AwsCredentialsTask
|
15
|
+
{
|
16
|
+
@Config("path_prefix")
|
17
|
+
String getPathPrefix();
|
18
|
+
|
19
|
+
@Config("file_ext")
|
20
|
+
@ConfigDefault("\".orc\"")
|
21
|
+
String getFileNameExtension();
|
22
|
+
|
23
|
+
@Config("column_options")
|
24
|
+
@ConfigDefault("{}")
|
25
|
+
Map<String, TimestampColumnOption> getColumnOptions();
|
26
|
+
|
27
|
+
@Config("sequence_format")
|
28
|
+
@ConfigDefault("\".%03d\"")
|
29
|
+
String getSequenceFormat();
|
30
|
+
|
31
|
+
// ORC File options
|
32
|
+
@Config("strip_size")
|
33
|
+
@ConfigDefault("100000")
|
34
|
+
Integer getStripSize();
|
35
|
+
|
36
|
+
@Config("buffer_size")
|
37
|
+
@ConfigDefault("10000")
|
38
|
+
Integer getBufferSize();
|
39
|
+
|
40
|
+
@Config("compression_kind")
|
41
|
+
@ConfigDefault("ZLIB")
|
42
|
+
public String getCompressionKind();
|
43
|
+
|
44
|
+
@Config("overwrite")
|
45
|
+
@ConfigDefault("false")
|
46
|
+
boolean getOverwrite();
|
47
|
+
|
48
|
+
@Config("default_from_timezone")
|
49
|
+
@ConfigDefault("\"UTC\"")
|
50
|
+
DateTimeZone getDefaultFromTimeZone();
|
51
|
+
|
52
|
+
@Config("endpoint")
|
53
|
+
@ConfigDefault("null")
|
54
|
+
Optional<String> getEndpoint();
|
55
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.output.orc;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.spi.time.TimestampFormatter;
|
8
|
+
import org.joda.time.DateTimeZone;
|
9
|
+
|
10
|
+
import java.util.List;
|
11
|
+
|
12
|
+
public interface TimestampColumnOption
|
13
|
+
extends Task, TimestampFormatter.TimestampColumnOption
|
14
|
+
{
|
15
|
+
@Config("from_timezone")
|
16
|
+
@ConfigDefault("null")
|
17
|
+
Optional<DateTimeZone> getFromTimeZone();
|
18
|
+
|
19
|
+
@Config("from_format")
|
20
|
+
@ConfigDefault("null")
|
21
|
+
Optional<List<String>> getFromFormat();
|
22
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,10 +58,11 @@ files:
|
|
58
58
|
- gradlew
|
59
59
|
- gradlew.bat
|
60
60
|
- lib/embulk/output/orc.rb
|
61
|
-
- src/main/java/org/embulk/output/orc/OrcCodec.java
|
62
61
|
- src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
|
63
62
|
- src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
|
64
63
|
- src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
|
64
|
+
- src/main/java/org/embulk/output/orc/PluginTask.java
|
65
|
+
- src/main/java/org/embulk/output/orc/TimestampColumnOption.java
|
65
66
|
- src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
|
66
67
|
- classpath/activation-1.1.jar
|
67
68
|
- classpath/aircompressor-0.3.jar
|
@@ -93,7 +94,7 @@ files:
|
|
93
94
|
- classpath/curator-client-2.7.1.jar
|
94
95
|
- classpath/curator-framework-2.7.1.jar
|
95
96
|
- classpath/curator-recipes-2.7.1.jar
|
96
|
-
- classpath/embulk-output-orc-0.2.
|
97
|
+
- classpath/embulk-output-orc-0.2.4.jar
|
97
98
|
- classpath/embulk-util-aws-credentials-0.2.8.jar
|
98
99
|
- classpath/gson-2.2.4.jar
|
99
100
|
- classpath/hadoop-annotations-2.7.3.jar
|
Binary file
|
@@ -1,21 +0,0 @@
|
|
1
|
-
package org.embulk.output.orc;
|
2
|
-
|
3
|
-
public enum OrcCodec
|
4
|
-
{
|
5
|
-
ZLIB("zlib"),
|
6
|
-
SNAPPY("snappy"),
|
7
|
-
LZO("lzo"),
|
8
|
-
LZ4("lz4"),
|
9
|
-
NONE("none"),;
|
10
|
-
String kind;
|
11
|
-
|
12
|
-
OrcCodec(String kind)
|
13
|
-
{
|
14
|
-
this.kind = kind;
|
15
|
-
}
|
16
|
-
|
17
|
-
public String getKind()
|
18
|
-
{
|
19
|
-
return kind;
|
20
|
-
}
|
21
|
-
}
|