embulk-output-orc 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +14 -6
- data/build.gradle +4 -9
- data/example/example.yml +1 -0
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +4 -2
- data/src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java +26 -0
- metadata +8 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 376187534c3d236c20656a656ed04d5c858e9a83
|
4
|
+
data.tar.gz: 4d4b74153e1e2fe0248b4ba5d097723dbc42022e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 888dab123d9cea81defbd51c0d5b812bd6c90e84c578927a72a491a8fffb4fac61fcced52ba7d759fdd1c36d9dba08a424c2d704b6f3f1ae51bbfdb3fac50728
|
7
|
+
data.tar.gz: bb7c0de447e7bf98b2ad7e7ba2b309854b0beaf37b6c10fd2a82be3736f6e39c8a576c985be0c62e9223ab24cb1a9bd4662e64d12974be490cd042002c933699
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Orc output plugin for Embulk
|
2
2
|
|
3
|
-
|
3
|
+
[](https://travis-ci.org/yuokada/embulk-output-orc)
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
@@ -11,17 +11,25 @@ TODO: Write short description here and build.gradle file.
|
|
11
11
|
|
12
12
|
## Configuration
|
13
13
|
|
14
|
-
- **
|
15
|
-
- **
|
16
|
-
- **
|
14
|
+
- **path_prefix**: A prefix of output path. (string, required)
|
15
|
+
- **file_ext**: An extension of output file. (string, default: `.orc`)
|
16
|
+
- **sequence_format**: (string, default: `.%03d`)
|
17
|
+
- **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
|
18
|
+
- **strip_size**: Set the ORC strip size (integer, default: `100000`)
|
19
|
+
- **compression_kind**: description (string, default: `'ZLIB'`)
|
20
|
+
- **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
|
21
|
+
- **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
|
17
22
|
|
18
23
|
## Example
|
19
24
|
|
20
25
|
```yaml
|
21
26
|
out:
|
22
27
|
type: orc
|
23
|
-
|
24
|
-
|
28
|
+
path_prefix: "/tmp/output"
|
29
|
+
buffer_size: 8000
|
30
|
+
strip_size: 90000
|
31
|
+
compression_kind: ZLIB
|
32
|
+
overwrite: true
|
25
33
|
```
|
26
34
|
|
27
35
|
|
data/build.gradle
CHANGED
@@ -14,7 +14,7 @@ configurations {
|
|
14
14
|
runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
|
15
15
|
}
|
16
16
|
|
17
|
-
version = "0.0
|
17
|
+
version = "0.1.0"
|
18
18
|
|
19
19
|
sourceCompatibility = 1.8
|
20
20
|
targetCompatibility = 1.8
|
@@ -22,15 +22,10 @@ targetCompatibility = 1.8
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.29"
|
24
24
|
provided "org.embulk:embulk-core:0.8.29"
|
25
|
-
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
26
25
|
|
27
26
|
compile "org.apache.orc:orc:1.4.0"
|
28
27
|
compile "org.apache.orc:orc-core:1.4.0"
|
29
|
-
compile "org.apache.
|
30
|
-
|
31
|
-
compile "org.apache.hadoop:hadoop-common:2.6.4"
|
32
|
-
compile "org.apache.hadoop:hadoop-client:2.6.4"
|
33
|
-
// compile "org.apache.hadoop:hadoop-hdfs:2.7.4"
|
28
|
+
compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
|
34
29
|
|
35
30
|
testCompile "junit:junit:4.+"
|
36
31
|
testCompile "org.embulk:embulk-core:0.8.29:tests"
|
@@ -89,10 +84,10 @@ Gem::Specification.new do |spec|
|
|
89
84
|
spec.version = "${project.version}"
|
90
85
|
spec.authors = ["yuokada"]
|
91
86
|
spec.summary = %[Orc output plugin for Embulk]
|
92
|
-
spec.description = %[Dumps records to Orc.]
|
87
|
+
spec.description = %[Dumps records to Orc format file.]
|
93
88
|
spec.email = ["callistoiv+git@gmail.com"]
|
94
89
|
spec.licenses = ["MIT"]
|
95
|
-
|
90
|
+
spec.homepage = "https://github.com/yuokada/embulk-output-orc"
|
96
91
|
|
97
92
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
98
93
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
data/example/example.yml
CHANGED
@@ -128,12 +128,14 @@ public class OrcOutputPlugin
|
|
128
128
|
{
|
129
129
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
130
130
|
|
131
|
+
if (task.getOverwrite()) {
|
132
|
+
OrcOutputPluginHelper.removeOldFile(buildPath(task, taskIndex));
|
133
|
+
}
|
134
|
+
|
131
135
|
final PageReader reader = new PageReader(schema);
|
132
136
|
Writer writer = createWriter(task, schema, taskIndex);
|
133
137
|
|
134
138
|
return new OrcTransactionalPageOutput(reader, writer, task);
|
135
|
-
// Write your code here :)
|
136
|
-
// throw new UnsupportedOperationException("OrcOutputPlugin.run method is not implemented yet");
|
137
139
|
}
|
138
140
|
|
139
141
|
private String buildPath(PluginTask task, int processorIndex)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.output.orc;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.nio.file.Files;
|
5
|
+
import java.nio.file.Path;
|
6
|
+
import java.nio.file.Paths;
|
7
|
+
|
8
|
+
class OrcOutputPluginHelper
|
9
|
+
{
|
10
|
+
protected OrcOutputPluginHelper()
|
11
|
+
{
|
12
|
+
throw new UnsupportedOperationException();
|
13
|
+
}
|
14
|
+
|
15
|
+
static void removeOldFile(String fpath)
|
16
|
+
{
|
17
|
+
Path path = Paths.get(fpath);
|
18
|
+
// TODO: Check local file. not HDFS or S3.
|
19
|
+
try {
|
20
|
+
Files.deleteIfExists(path);
|
21
|
+
}
|
22
|
+
catch (IOException e) {
|
23
|
+
e.printStackTrace();
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Dumps records to Orc.
|
41
|
+
description: Dumps records to Orc format file.
|
42
42
|
email:
|
43
43
|
- callistoiv+git@gmail.com
|
44
44
|
executables: []
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
51
52
|
- build.gradle
|
@@ -59,22 +60,20 @@ files:
|
|
59
60
|
- lib/embulk/output/orc.rb
|
60
61
|
- src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
|
61
62
|
- src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
|
63
|
+
- src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
|
62
64
|
- src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
|
63
|
-
- classpath/activation-1.1.jar
|
64
65
|
- classpath/aircompressor-0.3.jar
|
65
66
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
66
67
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
67
68
|
- classpath/api-asn1-api-1.0.0-M20.jar
|
68
69
|
- classpath/api-util-1.0.0-M20.jar
|
69
70
|
- classpath/asm-3.1.jar
|
70
|
-
- classpath/avro-1.7.4.jar
|
71
|
-
- classpath/commons-beanutils-1.7.0.jar
|
72
71
|
- classpath/commons-cli-1.2.jar
|
73
72
|
- classpath/commons-codec-1.6.jar
|
74
73
|
- classpath/commons-collections-3.2.2.jar
|
75
74
|
- classpath/commons-compress-1.4.1.jar
|
76
75
|
- classpath/commons-configuration-1.6.jar
|
77
|
-
- classpath/commons-
|
76
|
+
- classpath/commons-daemon-1.0.13.jar
|
78
77
|
- classpath/commons-el-1.0.jar
|
79
78
|
- classpath/commons-httpclient-3.1.jar
|
80
79
|
- classpath/commons-io-2.4.jar
|
@@ -84,70 +83,37 @@ files:
|
|
84
83
|
- classpath/commons-net-3.1.jar
|
85
84
|
- classpath/curator-client-2.6.0.jar
|
86
85
|
- classpath/curator-framework-2.6.0.jar
|
87
|
-
- classpath/
|
88
|
-
- classpath/embulk-output-orc-0.0.2.jar
|
86
|
+
- classpath/embulk-output-orc-0.1.0.jar
|
89
87
|
- classpath/gson-2.2.4.jar
|
90
|
-
- classpath/guice-servlet-3.0.jar
|
91
88
|
- classpath/hadoop-annotations-2.6.4.jar
|
92
89
|
- classpath/hadoop-auth-2.6.4.jar
|
93
|
-
- classpath/hadoop-client-2.6.4.jar
|
94
90
|
- classpath/hadoop-common-2.6.4.jar
|
95
91
|
- classpath/hadoop-hdfs-2.6.4.jar
|
96
|
-
- classpath/hadoop-mapreduce-client-app-2.6.4.jar
|
97
|
-
- classpath/hadoop-mapreduce-client-common-2.6.4.jar
|
98
|
-
- classpath/hadoop-mapreduce-client-core-2.6.4.jar
|
99
|
-
- classpath/hadoop-mapreduce-client-jobclient-2.6.4.jar
|
100
|
-
- classpath/hadoop-mapreduce-client-shuffle-2.6.4.jar
|
101
|
-
- classpath/hadoop-yarn-api-2.6.4.jar
|
102
|
-
- classpath/hadoop-yarn-client-2.6.4.jar
|
103
|
-
- classpath/hadoop-yarn-common-2.6.4.jar
|
104
|
-
- classpath/hadoop-yarn-server-common-2.6.4.jar
|
105
|
-
- classpath/hadoop-yarn-server-nodemanager-2.6.4.jar
|
106
92
|
- classpath/hive-storage-api-2.2.1.jar
|
107
93
|
- classpath/htrace-core-3.0.4.jar
|
108
94
|
- classpath/httpclient-4.2.5.jar
|
109
95
|
- classpath/httpcore-4.2.4.jar
|
110
96
|
- classpath/jackson-core-asl-1.9.13.jar
|
111
|
-
- classpath/jackson-jaxrs-1.9.13.jar
|
112
97
|
- classpath/jackson-mapper-asl-1.9.13.jar
|
113
|
-
- classpath/jackson-xc-1.9.13.jar
|
114
|
-
- classpath/jasper-compiler-5.5.23.jar
|
115
98
|
- classpath/jasper-runtime-5.5.23.jar
|
116
|
-
- classpath/java-xmlbuilder-0.4.jar
|
117
|
-
- classpath/jaxb-api-2.2.2.jar
|
118
|
-
- classpath/jaxb-impl-2.2.3-1.jar
|
119
|
-
- classpath/jersey-client-1.9.jar
|
120
99
|
- classpath/jersey-core-1.9.jar
|
121
|
-
- classpath/jersey-guice-1.9.jar
|
122
|
-
- classpath/jersey-json-1.9.jar
|
123
100
|
- classpath/jersey-server-1.9.jar
|
124
|
-
- classpath/jets3t-0.9.0.jar
|
125
|
-
- classpath/jettison-1.1.jar
|
126
101
|
- classpath/jetty-6.1.26.jar
|
127
102
|
- classpath/jetty-util-6.1.26.jar
|
128
103
|
- classpath/jline-0.9.94.jar
|
129
104
|
- classpath/jsch-0.1.42.jar
|
130
105
|
- classpath/jsp-api-2.1.jar
|
131
|
-
- classpath/jsr305-1.3.9.jar
|
132
|
-
- classpath/kryo-shaded-3.0.3.jar
|
133
|
-
- classpath/leveldbjni-all-1.8.jar
|
134
106
|
- classpath/log4j-1.2.17.jar
|
135
|
-
- classpath/minlog-1.3.0.jar
|
136
107
|
- classpath/netty-3.7.0.Final.jar
|
137
|
-
- classpath/objenesis-2.1.jar
|
138
108
|
- classpath/orc-core-1.4.0.jar
|
139
|
-
- classpath/orc-mapreduce-1.4.0.jar
|
140
|
-
- classpath/paranamer-2.3.jar
|
141
109
|
- classpath/protobuf-java-2.5.0.jar
|
142
110
|
- classpath/servlet-api-2.5.jar
|
143
|
-
- classpath/snappy-java-1.0.4.1.jar
|
144
|
-
- classpath/stax-api-1.0-2.jar
|
145
111
|
- classpath/xercesImpl-2.9.1.jar
|
146
112
|
- classpath/xml-apis-1.3.04.jar
|
147
113
|
- classpath/xmlenc-0.52.jar
|
148
114
|
- classpath/xz-1.0.jar
|
149
115
|
- classpath/zookeeper-3.4.6.jar
|
150
|
-
homepage:
|
116
|
+
homepage: https://github.com/yuokada/embulk-output-orc
|
151
117
|
licenses:
|
152
118
|
- MIT
|
153
119
|
metadata: {}
|