embulk-output-orc 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +12 -0
- data/README.md +14 -6
- data/build.gradle +4 -9
- data/example/example.yml +1 -0
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +4 -2
- data/src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java +26 -0
- metadata +8 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 376187534c3d236c20656a656ed04d5c858e9a83
|
4
|
+
data.tar.gz: 4d4b74153e1e2fe0248b4ba5d097723dbc42022e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 888dab123d9cea81defbd51c0d5b812bd6c90e84c578927a72a491a8fffb4fac61fcced52ba7d759fdd1c36d9dba08a424c2d704b6f3f1ae51bbfdb3fac50728
|
7
|
+
data.tar.gz: bb7c0de447e7bf98b2ad7e7ba2b309854b0beaf37b6c10fd2a82be3736f6e39c8a576c985be0c62e9223ab24cb1a9bd4662e64d12974be490cd042002c933699
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Orc output plugin for Embulk
|
2
2
|
|
3
|
-
|
3
|
+
[![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
@@ -11,17 +11,25 @@ TODO: Write short description here and build.gradle file.
|
|
11
11
|
|
12
12
|
## Configuration
|
13
13
|
|
14
|
-
- **
|
15
|
-
- **
|
16
|
-
- **
|
14
|
+
- **path_prefix**: A prefix of output path. (string, required)
|
15
|
+
- **file_ext**: An extension of output file. (string, default: `.orc`)
|
16
|
+
- **sequence_format**: (string, default: `.%03d`)
|
17
|
+
- **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
|
18
|
+
- **strip_size**: Set the ORC strip size (integer, default: `100000`)
|
19
|
+
- **compression_kind**: description (string, default: `'ZLIB'`)
|
20
|
+
- **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
|
21
|
+
- **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
|
17
22
|
|
18
23
|
## Example
|
19
24
|
|
20
25
|
```yaml
|
21
26
|
out:
|
22
27
|
type: orc
|
23
|
-
|
24
|
-
|
28
|
+
path_prefix: "/tmp/output"
|
29
|
+
buffer_size: 8000
|
30
|
+
strip_size: 90000
|
31
|
+
compression_kind: ZLIB
|
32
|
+
overwrite: true
|
25
33
|
```
|
26
34
|
|
27
35
|
|
data/build.gradle
CHANGED
@@ -14,7 +14,7 @@ configurations {
|
|
14
14
|
runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
|
15
15
|
}
|
16
16
|
|
17
|
-
version = "0.0
|
17
|
+
version = "0.1.0"
|
18
18
|
|
19
19
|
sourceCompatibility = 1.8
|
20
20
|
targetCompatibility = 1.8
|
@@ -22,15 +22,10 @@ targetCompatibility = 1.8
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.29"
|
24
24
|
provided "org.embulk:embulk-core:0.8.29"
|
25
|
-
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
26
25
|
|
27
26
|
compile "org.apache.orc:orc:1.4.0"
|
28
27
|
compile "org.apache.orc:orc-core:1.4.0"
|
29
|
-
compile "org.apache.
|
30
|
-
|
31
|
-
compile "org.apache.hadoop:hadoop-common:2.6.4"
|
32
|
-
compile "org.apache.hadoop:hadoop-client:2.6.4"
|
33
|
-
// compile "org.apache.hadoop:hadoop-hdfs:2.7.4"
|
28
|
+
compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
|
34
29
|
|
35
30
|
testCompile "junit:junit:4.+"
|
36
31
|
testCompile "org.embulk:embulk-core:0.8.29:tests"
|
@@ -89,10 +84,10 @@ Gem::Specification.new do |spec|
|
|
89
84
|
spec.version = "${project.version}"
|
90
85
|
spec.authors = ["yuokada"]
|
91
86
|
spec.summary = %[Orc output plugin for Embulk]
|
92
|
-
spec.description = %[Dumps records to Orc.]
|
87
|
+
spec.description = %[Dumps records to Orc format file.]
|
93
88
|
spec.email = ["callistoiv+git@gmail.com"]
|
94
89
|
spec.licenses = ["MIT"]
|
95
|
-
|
90
|
+
spec.homepage = "https://github.com/yuokada/embulk-output-orc"
|
96
91
|
|
97
92
|
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
98
93
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
data/example/example.yml
CHANGED
@@ -128,12 +128,14 @@ public class OrcOutputPlugin
|
|
128
128
|
{
|
129
129
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
130
130
|
|
131
|
+
if (task.getOverwrite()) {
|
132
|
+
OrcOutputPluginHelper.removeOldFile(buildPath(task, taskIndex));
|
133
|
+
}
|
134
|
+
|
131
135
|
final PageReader reader = new PageReader(schema);
|
132
136
|
Writer writer = createWriter(task, schema, taskIndex);
|
133
137
|
|
134
138
|
return new OrcTransactionalPageOutput(reader, writer, task);
|
135
|
-
// Write your code here :)
|
136
|
-
// throw new UnsupportedOperationException("OrcOutputPlugin.run method is not implemented yet");
|
137
139
|
}
|
138
140
|
|
139
141
|
private String buildPath(PluginTask task, int processorIndex)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.output.orc;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.nio.file.Files;
|
5
|
+
import java.nio.file.Path;
|
6
|
+
import java.nio.file.Paths;
|
7
|
+
|
8
|
+
class OrcOutputPluginHelper
|
9
|
+
{
|
10
|
+
protected OrcOutputPluginHelper()
|
11
|
+
{
|
12
|
+
throw new UnsupportedOperationException();
|
13
|
+
}
|
14
|
+
|
15
|
+
static void removeOldFile(String fpath)
|
16
|
+
{
|
17
|
+
Path path = Paths.get(fpath);
|
18
|
+
// TODO: Check local file. not HDFS or S3.
|
19
|
+
try {
|
20
|
+
Files.deleteIfExists(path);
|
21
|
+
}
|
22
|
+
catch (IOException e) {
|
23
|
+
e.printStackTrace();
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Dumps records to Orc.
|
41
|
+
description: Dumps records to Orc format file.
|
42
42
|
email:
|
43
43
|
- callistoiv+git@gmail.com
|
44
44
|
executables: []
|
@@ -46,6 +46,7 @@ extensions: []
|
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
48
|
- .gitignore
|
49
|
+
- .travis.yml
|
49
50
|
- LICENSE.txt
|
50
51
|
- README.md
|
51
52
|
- build.gradle
|
@@ -59,22 +60,20 @@ files:
|
|
59
60
|
- lib/embulk/output/orc.rb
|
60
61
|
- src/main/java/org/embulk/output/orc/OrcColumnVisitor.java
|
61
62
|
- src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
|
63
|
+
- src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
|
62
64
|
- src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
|
63
|
-
- classpath/activation-1.1.jar
|
64
65
|
- classpath/aircompressor-0.3.jar
|
65
66
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
66
67
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
67
68
|
- classpath/api-asn1-api-1.0.0-M20.jar
|
68
69
|
- classpath/api-util-1.0.0-M20.jar
|
69
70
|
- classpath/asm-3.1.jar
|
70
|
-
- classpath/avro-1.7.4.jar
|
71
|
-
- classpath/commons-beanutils-1.7.0.jar
|
72
71
|
- classpath/commons-cli-1.2.jar
|
73
72
|
- classpath/commons-codec-1.6.jar
|
74
73
|
- classpath/commons-collections-3.2.2.jar
|
75
74
|
- classpath/commons-compress-1.4.1.jar
|
76
75
|
- classpath/commons-configuration-1.6.jar
|
77
|
-
- classpath/commons-
|
76
|
+
- classpath/commons-daemon-1.0.13.jar
|
78
77
|
- classpath/commons-el-1.0.jar
|
79
78
|
- classpath/commons-httpclient-3.1.jar
|
80
79
|
- classpath/commons-io-2.4.jar
|
@@ -84,70 +83,37 @@ files:
|
|
84
83
|
- classpath/commons-net-3.1.jar
|
85
84
|
- classpath/curator-client-2.6.0.jar
|
86
85
|
- classpath/curator-framework-2.6.0.jar
|
87
|
-
- classpath/
|
88
|
-
- classpath/embulk-output-orc-0.0.2.jar
|
86
|
+
- classpath/embulk-output-orc-0.1.0.jar
|
89
87
|
- classpath/gson-2.2.4.jar
|
90
|
-
- classpath/guice-servlet-3.0.jar
|
91
88
|
- classpath/hadoop-annotations-2.6.4.jar
|
92
89
|
- classpath/hadoop-auth-2.6.4.jar
|
93
|
-
- classpath/hadoop-client-2.6.4.jar
|
94
90
|
- classpath/hadoop-common-2.6.4.jar
|
95
91
|
- classpath/hadoop-hdfs-2.6.4.jar
|
96
|
-
- classpath/hadoop-mapreduce-client-app-2.6.4.jar
|
97
|
-
- classpath/hadoop-mapreduce-client-common-2.6.4.jar
|
98
|
-
- classpath/hadoop-mapreduce-client-core-2.6.4.jar
|
99
|
-
- classpath/hadoop-mapreduce-client-jobclient-2.6.4.jar
|
100
|
-
- classpath/hadoop-mapreduce-client-shuffle-2.6.4.jar
|
101
|
-
- classpath/hadoop-yarn-api-2.6.4.jar
|
102
|
-
- classpath/hadoop-yarn-client-2.6.4.jar
|
103
|
-
- classpath/hadoop-yarn-common-2.6.4.jar
|
104
|
-
- classpath/hadoop-yarn-server-common-2.6.4.jar
|
105
|
-
- classpath/hadoop-yarn-server-nodemanager-2.6.4.jar
|
106
92
|
- classpath/hive-storage-api-2.2.1.jar
|
107
93
|
- classpath/htrace-core-3.0.4.jar
|
108
94
|
- classpath/httpclient-4.2.5.jar
|
109
95
|
- classpath/httpcore-4.2.4.jar
|
110
96
|
- classpath/jackson-core-asl-1.9.13.jar
|
111
|
-
- classpath/jackson-jaxrs-1.9.13.jar
|
112
97
|
- classpath/jackson-mapper-asl-1.9.13.jar
|
113
|
-
- classpath/jackson-xc-1.9.13.jar
|
114
|
-
- classpath/jasper-compiler-5.5.23.jar
|
115
98
|
- classpath/jasper-runtime-5.5.23.jar
|
116
|
-
- classpath/java-xmlbuilder-0.4.jar
|
117
|
-
- classpath/jaxb-api-2.2.2.jar
|
118
|
-
- classpath/jaxb-impl-2.2.3-1.jar
|
119
|
-
- classpath/jersey-client-1.9.jar
|
120
99
|
- classpath/jersey-core-1.9.jar
|
121
|
-
- classpath/jersey-guice-1.9.jar
|
122
|
-
- classpath/jersey-json-1.9.jar
|
123
100
|
- classpath/jersey-server-1.9.jar
|
124
|
-
- classpath/jets3t-0.9.0.jar
|
125
|
-
- classpath/jettison-1.1.jar
|
126
101
|
- classpath/jetty-6.1.26.jar
|
127
102
|
- classpath/jetty-util-6.1.26.jar
|
128
103
|
- classpath/jline-0.9.94.jar
|
129
104
|
- classpath/jsch-0.1.42.jar
|
130
105
|
- classpath/jsp-api-2.1.jar
|
131
|
-
- classpath/jsr305-1.3.9.jar
|
132
|
-
- classpath/kryo-shaded-3.0.3.jar
|
133
|
-
- classpath/leveldbjni-all-1.8.jar
|
134
106
|
- classpath/log4j-1.2.17.jar
|
135
|
-
- classpath/minlog-1.3.0.jar
|
136
107
|
- classpath/netty-3.7.0.Final.jar
|
137
|
-
- classpath/objenesis-2.1.jar
|
138
108
|
- classpath/orc-core-1.4.0.jar
|
139
|
-
- classpath/orc-mapreduce-1.4.0.jar
|
140
|
-
- classpath/paranamer-2.3.jar
|
141
109
|
- classpath/protobuf-java-2.5.0.jar
|
142
110
|
- classpath/servlet-api-2.5.jar
|
143
|
-
- classpath/snappy-java-1.0.4.1.jar
|
144
|
-
- classpath/stax-api-1.0-2.jar
|
145
111
|
- classpath/xercesImpl-2.9.1.jar
|
146
112
|
- classpath/xml-apis-1.3.04.jar
|
147
113
|
- classpath/xmlenc-0.52.jar
|
148
114
|
- classpath/xz-1.0.jar
|
149
115
|
- classpath/zookeeper-3.4.6.jar
|
150
|
-
homepage:
|
116
|
+
homepage: https://github.com/yuokada/embulk-output-orc
|
151
117
|
licenses:
|
152
118
|
- MIT
|
153
119
|
metadata: {}
|