embulk-output-orc 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/build.gradle +8 -1
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +19 -5
- metadata +36 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a4d872e06384fd47cf8c1707398234974e0ecb3
|
4
|
+
data.tar.gz: bb3855c44c55f16bc5e1707f09e3a1b6c683aee6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a16e0c62d57089d5ff431021b97586a8203a423ab3f8797a4e2274f26cb168a91a59ae72d6d42b4f2540209565d05d9e3f1ae66af242e52c90ab1f239afe90cd
|
7
|
+
data.tar.gz: 21f77bd871793f06b014ce9df5c08ab7e0f0d610b7cc69b749e415a1f181891288ba06211dbe3d769bfd756bfaaf344d1eeecd42db9f085a7feba28560921c96
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Orc output plugin for Embulk
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/embulk-output-orc.svg)](https://badge.fury.io/rb/embulk-output-orc)
|
4
5
|
|
5
6
|
## Overview
|
6
7
|
|
@@ -12,6 +13,7 @@
|
|
12
13
|
## Configuration
|
13
14
|
|
14
15
|
- **path_prefix**: A prefix of output path. (string, required)
|
16
|
+
- support: `file`, `s3n` and `s3a`.
|
15
17
|
- **file_ext**: An extension of output file. (string, default: `.orc`)
|
16
18
|
- **sequence_format**: (string, default: `.%03d`)
|
17
19
|
- **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
|
@@ -20,6 +22,12 @@
|
|
20
22
|
- **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
|
21
23
|
- **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
|
22
24
|
|
25
|
+
- **auth_method**: name of mechanism to authenticate requests (basic, env, instance, profile, properties, anonymous, or session. default: basic)
|
26
|
+
see: https://github.com/embulk/embulk-input-s3#configuration
|
27
|
+
|
28
|
+
- `env`, `basic`, `profile`, `default`, `session`, `anonymous`, `properties`
|
29
|
+
|
30
|
+
|
23
31
|
## Example
|
24
32
|
|
25
33
|
```yaml
|
@@ -32,6 +40,17 @@ out:
|
|
32
40
|
overwrite: true
|
33
41
|
```
|
34
42
|
|
43
|
+
## ChangeLog
|
44
|
+
|
45
|
+
### ver 0.2.0
|
46
|
+
|
47
|
+
- support: output to s3
|
48
|
+
|
49
|
+
- `s3n`, `s3a` protocol
|
50
|
+
|
51
|
+
### ver 0.1.0
|
52
|
+
|
53
|
+
- initial release
|
35
54
|
|
36
55
|
## Build
|
37
56
|
|
data/build.gradle
CHANGED
@@ -8,13 +8,16 @@ import com.github.jrubygradle.JRubyExec
|
|
8
8
|
repositories {
|
9
9
|
mavenCentral()
|
10
10
|
jcenter()
|
11
|
+
maven {
|
12
|
+
url "http://dl.bintray.com/embulk-input-s3/maven"
|
13
|
+
}
|
11
14
|
}
|
12
15
|
configurations {
|
13
16
|
provided
|
14
17
|
runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
|
15
18
|
}
|
16
19
|
|
17
|
-
version = "0.
|
20
|
+
version = "0.2.0"
|
18
21
|
|
19
22
|
sourceCompatibility = 1.8
|
20
23
|
targetCompatibility = 1.8
|
@@ -27,6 +30,10 @@ dependencies {
|
|
27
30
|
compile "org.apache.orc:orc-core:1.4.0"
|
28
31
|
compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
|
29
32
|
|
33
|
+
compile 'org.embulk.input.s3:embulk-util-aws-credentials:0.2.8'
|
34
|
+
compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
|
35
|
+
compile "org.apache.hadoop:hadoop-aws:2.7.3"
|
36
|
+
|
30
37
|
testCompile "junit:junit:4.+"
|
31
38
|
testCompile "org.embulk:embulk-core:0.8.29:tests"
|
32
39
|
testCompile "org.embulk:embulk-standards:0.8.29"
|
@@ -29,6 +29,8 @@ import org.embulk.spi.TransactionalPageOutput;
|
|
29
29
|
import org.embulk.spi.time.TimestampFormatter;
|
30
30
|
import org.embulk.spi.type.Type;
|
31
31
|
import org.embulk.spi.util.Timestamps;
|
32
|
+
import org.embulk.util.aws.credentials.AwsCredentials;
|
33
|
+
import org.embulk.util.aws.credentials.AwsCredentialsTask;
|
32
34
|
import org.joda.time.DateTimeZone;
|
33
35
|
import org.joda.time.format.DateTimeFormat;
|
34
36
|
import org.joda.time.format.DateTimeFormatter;
|
@@ -41,7 +43,7 @@ public class OrcOutputPlugin
|
|
41
43
|
implements OutputPlugin
|
42
44
|
{
|
43
45
|
public interface PluginTask
|
44
|
-
extends Task, TimestampFormatter.Task
|
46
|
+
extends Task, TimestampFormatter.Task, AwsCredentialsTask
|
45
47
|
{
|
46
48
|
@Config("path_prefix")
|
47
49
|
String getPathPrefix();
|
@@ -176,7 +178,7 @@ public class OrcOutputPlugin
|
|
176
178
|
return oschema;
|
177
179
|
}
|
178
180
|
|
179
|
-
private Configuration getHadoopConfiguration()
|
181
|
+
private Configuration getHadoopConfiguration(PluginTask task)
|
180
182
|
{
|
181
183
|
Configuration conf = new Configuration();
|
182
184
|
|
@@ -185,14 +187,25 @@ public class OrcOutputPlugin
|
|
185
187
|
conf.set("fs.file.impl", LocalFileSystem.class.getName());
|
186
188
|
// see: https://stackoverflow.com/questions/20833444/how-to-set-objects-in-hadoop-configuration
|
187
189
|
|
190
|
+
AwsCredentials.getAWSCredentialsProvider(task);
|
191
|
+
if (task.getAccessKeyId().isPresent()) {
|
192
|
+
conf.set("fs.s3a.access.key", task.getAccessKeyId().get());
|
193
|
+
conf.set("fs.s3n.awsAccessKeyId", task.getAccessKeyId().get());
|
194
|
+
}
|
195
|
+
if (task.getSecretAccessKey().isPresent()) {
|
196
|
+
conf.set("fs.s3a.secret.key", task.getSecretAccessKey().get());
|
197
|
+
conf.set("fs.s3n.awsSecretAccessKey", task.getSecretAccessKey().get());
|
198
|
+
}
|
199
|
+
|
188
200
|
return conf;
|
189
201
|
}
|
190
202
|
|
191
203
|
private Writer createWriter(PluginTask task, Schema schema, int processorIndex)
|
192
204
|
{
|
193
|
-
final TimestampFormatter[] timestampFormatters = Timestamps
|
205
|
+
final TimestampFormatter[] timestampFormatters = Timestamps
|
206
|
+
.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
194
207
|
|
195
|
-
Configuration conf = getHadoopConfiguration();
|
208
|
+
Configuration conf = getHadoopConfiguration(task);
|
196
209
|
TypeDescription oschema = getSchema(schema);
|
197
210
|
|
198
211
|
// see: https://groups.google.com/forum/#!topic/vertx/lLb-slzpWVg
|
@@ -256,7 +269,8 @@ public class OrcOutputPlugin
|
|
256
269
|
this.writer = writer;
|
257
270
|
|
258
271
|
// formatter
|
259
|
-
DateTimeZone defaultTimeZone = DateTimeZone
|
272
|
+
DateTimeZone defaultTimeZone = DateTimeZone
|
273
|
+
.forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
|
260
274
|
formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZone(defaultTimeZone);
|
261
275
|
}
|
262
276
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,18 +62,26 @@ files:
|
|
62
62
|
- src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
|
63
63
|
- src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
|
64
64
|
- src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
|
65
|
+
- classpath/activation-1.1.jar
|
65
66
|
- classpath/aircompressor-0.3.jar
|
66
67
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
67
68
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
68
69
|
- classpath/api-asn1-api-1.0.0-M20.jar
|
69
70
|
- classpath/api-util-1.0.0-M20.jar
|
70
71
|
- classpath/asm-3.1.jar
|
72
|
+
- classpath/avro-1.7.4.jar
|
73
|
+
- classpath/aws-java-sdk-1.7.4.jar
|
74
|
+
- classpath/aws-java-sdk-core-1.10.33.jar
|
75
|
+
- classpath/aws-java-sdk-kms-1.10.33.jar
|
76
|
+
- classpath/aws-java-sdk-s3-1.10.33.jar
|
77
|
+
- classpath/commons-beanutils-1.7.0.jar
|
71
78
|
- classpath/commons-cli-1.2.jar
|
72
79
|
- classpath/commons-codec-1.6.jar
|
73
80
|
- classpath/commons-collections-3.2.2.jar
|
74
81
|
- classpath/commons-compress-1.4.1.jar
|
75
82
|
- classpath/commons-configuration-1.6.jar
|
76
83
|
- classpath/commons-daemon-1.0.13.jar
|
84
|
+
- classpath/commons-digester-1.8.jar
|
77
85
|
- classpath/commons-el-1.0.jar
|
78
86
|
- classpath/commons-httpclient-3.1.jar
|
79
87
|
- classpath/commons-io-2.4.jar
|
@@ -81,33 +89,51 @@ files:
|
|
81
89
|
- classpath/commons-logging-1.1.3.jar
|
82
90
|
- classpath/commons-math3-3.1.1.jar
|
83
91
|
- classpath/commons-net-3.1.jar
|
84
|
-
- classpath/curator-client-2.
|
85
|
-
- classpath/curator-framework-2.
|
86
|
-
- classpath/
|
92
|
+
- classpath/curator-client-2.7.1.jar
|
93
|
+
- classpath/curator-framework-2.7.1.jar
|
94
|
+
- classpath/curator-recipes-2.7.1.jar
|
95
|
+
- classpath/embulk-output-orc-0.2.0.jar
|
96
|
+
- classpath/embulk-util-aws-credentials-0.2.8.jar
|
87
97
|
- classpath/gson-2.2.4.jar
|
88
|
-
- classpath/hadoop-annotations-2.
|
89
|
-
- classpath/hadoop-auth-2.
|
90
|
-
- classpath/hadoop-
|
98
|
+
- classpath/hadoop-annotations-2.7.3.jar
|
99
|
+
- classpath/hadoop-auth-2.7.3.jar
|
100
|
+
- classpath/hadoop-aws-2.7.3.jar
|
101
|
+
- classpath/hadoop-common-2.7.3.jar
|
91
102
|
- classpath/hadoop-hdfs-2.6.4.jar
|
92
103
|
- classpath/hive-storage-api-2.2.1.jar
|
93
104
|
- classpath/htrace-core-3.0.4.jar
|
94
|
-
- classpath/
|
95
|
-
- classpath/
|
105
|
+
- classpath/htrace-core-3.1.0-incubating.jar
|
106
|
+
- classpath/httpclient-4.3.6.jar
|
107
|
+
- classpath/httpcore-4.3.3.jar
|
96
108
|
- classpath/jackson-core-asl-1.9.13.jar
|
109
|
+
- classpath/jackson-jaxrs-1.8.3.jar
|
97
110
|
- classpath/jackson-mapper-asl-1.9.13.jar
|
111
|
+
- classpath/jackson-xc-1.8.3.jar
|
98
112
|
- classpath/jasper-runtime-5.5.23.jar
|
113
|
+
- classpath/java-xmlbuilder-0.4.jar
|
114
|
+
- classpath/jaxb-api-2.2.2.jar
|
115
|
+
- classpath/jaxb-impl-2.2.3-1.jar
|
116
|
+
- classpath/jcl-over-slf4j-1.7.12.jar
|
99
117
|
- classpath/jersey-core-1.9.jar
|
118
|
+
- classpath/jersey-json-1.9.jar
|
100
119
|
- classpath/jersey-server-1.9.jar
|
120
|
+
- classpath/jets3t-0.9.0.jar
|
121
|
+
- classpath/jettison-1.1.jar
|
101
122
|
- classpath/jetty-6.1.26.jar
|
102
123
|
- classpath/jetty-util-6.1.26.jar
|
103
124
|
- classpath/jline-0.9.94.jar
|
125
|
+
- classpath/joda-time-2.9.9.jar
|
104
126
|
- classpath/jsch-0.1.42.jar
|
105
127
|
- classpath/jsp-api-2.1.jar
|
128
|
+
- classpath/jsr305-3.0.0.jar
|
106
129
|
- classpath/log4j-1.2.17.jar
|
107
130
|
- classpath/netty-3.7.0.Final.jar
|
108
131
|
- classpath/orc-core-1.4.0.jar
|
132
|
+
- classpath/paranamer-2.3.jar
|
109
133
|
- classpath/protobuf-java-2.5.0.jar
|
110
134
|
- classpath/servlet-api-2.5.jar
|
135
|
+
- classpath/snappy-java-1.0.4.1.jar
|
136
|
+
- classpath/stax-api-1.0-2.jar
|
111
137
|
- classpath/xercesImpl-2.9.1.jar
|
112
138
|
- classpath/xml-apis-1.3.04.jar
|
113
139
|
- classpath/xmlenc-0.52.jar
|