embulk-output-orc 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +19 -0
- data/build.gradle +8 -1
- data/src/main/java/org/embulk/output/orc/OrcOutputPlugin.java +19 -5
- metadata +36 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a4d872e06384fd47cf8c1707398234974e0ecb3
|
4
|
+
data.tar.gz: bb3855c44c55f16bc5e1707f09e3a1b6c683aee6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a16e0c62d57089d5ff431021b97586a8203a423ab3f8797a4e2274f26cb168a91a59ae72d6d42b4f2540209565d05d9e3f1ae66af242e52c90ab1f239afe90cd
|
7
|
+
data.tar.gz: 21f77bd871793f06b014ce9df5c08ab7e0f0d610b7cc69b749e415a1f181891288ba06211dbe3d769bfd756bfaaf344d1eeecd42db9f085a7feba28560921c96
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Orc output plugin for Embulk
|
2
2
|
|
3
3
|
[](https://travis-ci.org/yuokada/embulk-output-orc)
|
4
|
+
[](https://badge.fury.io/rb/embulk-output-orc)
|
4
5
|
|
5
6
|
## Overview
|
6
7
|
|
@@ -12,6 +13,7 @@
|
|
12
13
|
## Configuration
|
13
14
|
|
14
15
|
- **path_prefix**: A prefix of output path. (string, required)
|
16
|
+
- support: `file`, `s3n` and `s3a`.
|
15
17
|
- **file_ext**: An extension of output file. (string, default: `.orc`)
|
16
18
|
- **sequence_format**: (string, default: `.%03d`)
|
17
19
|
- **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
|
@@ -20,6 +22,12 @@
|
|
20
22
|
- **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
|
21
23
|
- **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
|
22
24
|
|
25
|
+
- **auth_method**: name of mechanism to authenticate requests (basic, env, instance, profile, properties, anonymous, or session. default: basic)
|
26
|
+
see: https://github.com/embulk/embulk-input-s3#configuration
|
27
|
+
|
28
|
+
- `env`, `basic`, `profile`, `default`, `session`, `anonymous`, `properties`
|
29
|
+
|
30
|
+
|
23
31
|
## Example
|
24
32
|
|
25
33
|
```yaml
|
@@ -32,6 +40,17 @@ out:
|
|
32
40
|
overwrite: true
|
33
41
|
```
|
34
42
|
|
43
|
+
## ChangeLog
|
44
|
+
|
45
|
+
### ver 0.2.0
|
46
|
+
|
47
|
+
- support: output to s3
|
48
|
+
|
49
|
+
- `s3n`, `s3a` protocol
|
50
|
+
|
51
|
+
### ver 0.1.0
|
52
|
+
|
53
|
+
- initial release
|
35
54
|
|
36
55
|
## Build
|
37
56
|
|
data/build.gradle
CHANGED
@@ -8,13 +8,16 @@ import com.github.jrubygradle.JRubyExec
|
|
8
8
|
repositories {
|
9
9
|
mavenCentral()
|
10
10
|
jcenter()
|
11
|
+
maven {
|
12
|
+
url "http://dl.bintray.com/embulk-input-s3/maven"
|
13
|
+
}
|
11
14
|
}
|
12
15
|
configurations {
|
13
16
|
provided
|
14
17
|
runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
|
15
18
|
}
|
16
19
|
|
17
|
-
version = "0.
|
20
|
+
version = "0.2.0"
|
18
21
|
|
19
22
|
sourceCompatibility = 1.8
|
20
23
|
targetCompatibility = 1.8
|
@@ -27,6 +30,10 @@ dependencies {
|
|
27
30
|
compile "org.apache.orc:orc-core:1.4.0"
|
28
31
|
compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
|
29
32
|
|
33
|
+
compile 'org.embulk.input.s3:embulk-util-aws-credentials:0.2.8'
|
34
|
+
compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
|
35
|
+
compile "org.apache.hadoop:hadoop-aws:2.7.3"
|
36
|
+
|
30
37
|
testCompile "junit:junit:4.+"
|
31
38
|
testCompile "org.embulk:embulk-core:0.8.29:tests"
|
32
39
|
testCompile "org.embulk:embulk-standards:0.8.29"
|
@@ -29,6 +29,8 @@ import org.embulk.spi.TransactionalPageOutput;
|
|
29
29
|
import org.embulk.spi.time.TimestampFormatter;
|
30
30
|
import org.embulk.spi.type.Type;
|
31
31
|
import org.embulk.spi.util.Timestamps;
|
32
|
+
import org.embulk.util.aws.credentials.AwsCredentials;
|
33
|
+
import org.embulk.util.aws.credentials.AwsCredentialsTask;
|
32
34
|
import org.joda.time.DateTimeZone;
|
33
35
|
import org.joda.time.format.DateTimeFormat;
|
34
36
|
import org.joda.time.format.DateTimeFormatter;
|
@@ -41,7 +43,7 @@ public class OrcOutputPlugin
|
|
41
43
|
implements OutputPlugin
|
42
44
|
{
|
43
45
|
public interface PluginTask
|
44
|
-
extends Task, TimestampFormatter.Task
|
46
|
+
extends Task, TimestampFormatter.Task, AwsCredentialsTask
|
45
47
|
{
|
46
48
|
@Config("path_prefix")
|
47
49
|
String getPathPrefix();
|
@@ -176,7 +178,7 @@ public class OrcOutputPlugin
|
|
176
178
|
return oschema;
|
177
179
|
}
|
178
180
|
|
179
|
-
private Configuration getHadoopConfiguration()
|
181
|
+
private Configuration getHadoopConfiguration(PluginTask task)
|
180
182
|
{
|
181
183
|
Configuration conf = new Configuration();
|
182
184
|
|
@@ -185,14 +187,25 @@ public class OrcOutputPlugin
|
|
185
187
|
conf.set("fs.file.impl", LocalFileSystem.class.getName());
|
186
188
|
// see: https://stackoverflow.com/questions/20833444/how-to-set-objects-in-hadoop-configuration
|
187
189
|
|
190
|
+
AwsCredentials.getAWSCredentialsProvider(task);
|
191
|
+
if (task.getAccessKeyId().isPresent()) {
|
192
|
+
conf.set("fs.s3a.access.key", task.getAccessKeyId().get());
|
193
|
+
conf.set("fs.s3n.awsAccessKeyId", task.getAccessKeyId().get());
|
194
|
+
}
|
195
|
+
if (task.getSecretAccessKey().isPresent()) {
|
196
|
+
conf.set("fs.s3a.secret.key", task.getSecretAccessKey().get());
|
197
|
+
conf.set("fs.s3n.awsSecretAccessKey", task.getSecretAccessKey().get());
|
198
|
+
}
|
199
|
+
|
188
200
|
return conf;
|
189
201
|
}
|
190
202
|
|
191
203
|
private Writer createWriter(PluginTask task, Schema schema, int processorIndex)
|
192
204
|
{
|
193
|
-
final TimestampFormatter[] timestampFormatters = Timestamps
|
205
|
+
final TimestampFormatter[] timestampFormatters = Timestamps
|
206
|
+
.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
|
194
207
|
|
195
|
-
Configuration conf = getHadoopConfiguration();
|
208
|
+
Configuration conf = getHadoopConfiguration(task);
|
196
209
|
TypeDescription oschema = getSchema(schema);
|
197
210
|
|
198
211
|
// see: https://groups.google.com/forum/#!topic/vertx/lLb-slzpWVg
|
@@ -256,7 +269,8 @@ public class OrcOutputPlugin
|
|
256
269
|
this.writer = writer;
|
257
270
|
|
258
271
|
// formatter
|
259
|
-
DateTimeZone defaultTimeZone = DateTimeZone
|
272
|
+
DateTimeZone defaultTimeZone = DateTimeZone
|
273
|
+
.forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
|
260
274
|
formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZone(defaultTimeZone);
|
261
275
|
}
|
262
276
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-orc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yuokada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,18 +62,26 @@ files:
|
|
62
62
|
- src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
|
63
63
|
- src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
|
64
64
|
- src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
|
65
|
+
- classpath/activation-1.1.jar
|
65
66
|
- classpath/aircompressor-0.3.jar
|
66
67
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
67
68
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
68
69
|
- classpath/api-asn1-api-1.0.0-M20.jar
|
69
70
|
- classpath/api-util-1.0.0-M20.jar
|
70
71
|
- classpath/asm-3.1.jar
|
72
|
+
- classpath/avro-1.7.4.jar
|
73
|
+
- classpath/aws-java-sdk-1.7.4.jar
|
74
|
+
- classpath/aws-java-sdk-core-1.10.33.jar
|
75
|
+
- classpath/aws-java-sdk-kms-1.10.33.jar
|
76
|
+
- classpath/aws-java-sdk-s3-1.10.33.jar
|
77
|
+
- classpath/commons-beanutils-1.7.0.jar
|
71
78
|
- classpath/commons-cli-1.2.jar
|
72
79
|
- classpath/commons-codec-1.6.jar
|
73
80
|
- classpath/commons-collections-3.2.2.jar
|
74
81
|
- classpath/commons-compress-1.4.1.jar
|
75
82
|
- classpath/commons-configuration-1.6.jar
|
76
83
|
- classpath/commons-daemon-1.0.13.jar
|
84
|
+
- classpath/commons-digester-1.8.jar
|
77
85
|
- classpath/commons-el-1.0.jar
|
78
86
|
- classpath/commons-httpclient-3.1.jar
|
79
87
|
- classpath/commons-io-2.4.jar
|
@@ -81,33 +89,51 @@ files:
|
|
81
89
|
- classpath/commons-logging-1.1.3.jar
|
82
90
|
- classpath/commons-math3-3.1.1.jar
|
83
91
|
- classpath/commons-net-3.1.jar
|
84
|
-
- classpath/curator-client-2.
|
85
|
-
- classpath/curator-framework-2.
|
86
|
-
- classpath/
|
92
|
+
- classpath/curator-client-2.7.1.jar
|
93
|
+
- classpath/curator-framework-2.7.1.jar
|
94
|
+
- classpath/curator-recipes-2.7.1.jar
|
95
|
+
- classpath/embulk-output-orc-0.2.0.jar
|
96
|
+
- classpath/embulk-util-aws-credentials-0.2.8.jar
|
87
97
|
- classpath/gson-2.2.4.jar
|
88
|
-
- classpath/hadoop-annotations-2.
|
89
|
-
- classpath/hadoop-auth-2.
|
90
|
-
- classpath/hadoop-
|
98
|
+
- classpath/hadoop-annotations-2.7.3.jar
|
99
|
+
- classpath/hadoop-auth-2.7.3.jar
|
100
|
+
- classpath/hadoop-aws-2.7.3.jar
|
101
|
+
- classpath/hadoop-common-2.7.3.jar
|
91
102
|
- classpath/hadoop-hdfs-2.6.4.jar
|
92
103
|
- classpath/hive-storage-api-2.2.1.jar
|
93
104
|
- classpath/htrace-core-3.0.4.jar
|
94
|
-
- classpath/
|
95
|
-
- classpath/
|
105
|
+
- classpath/htrace-core-3.1.0-incubating.jar
|
106
|
+
- classpath/httpclient-4.3.6.jar
|
107
|
+
- classpath/httpcore-4.3.3.jar
|
96
108
|
- classpath/jackson-core-asl-1.9.13.jar
|
109
|
+
- classpath/jackson-jaxrs-1.8.3.jar
|
97
110
|
- classpath/jackson-mapper-asl-1.9.13.jar
|
111
|
+
- classpath/jackson-xc-1.8.3.jar
|
98
112
|
- classpath/jasper-runtime-5.5.23.jar
|
113
|
+
- classpath/java-xmlbuilder-0.4.jar
|
114
|
+
- classpath/jaxb-api-2.2.2.jar
|
115
|
+
- classpath/jaxb-impl-2.2.3-1.jar
|
116
|
+
- classpath/jcl-over-slf4j-1.7.12.jar
|
99
117
|
- classpath/jersey-core-1.9.jar
|
118
|
+
- classpath/jersey-json-1.9.jar
|
100
119
|
- classpath/jersey-server-1.9.jar
|
120
|
+
- classpath/jets3t-0.9.0.jar
|
121
|
+
- classpath/jettison-1.1.jar
|
101
122
|
- classpath/jetty-6.1.26.jar
|
102
123
|
- classpath/jetty-util-6.1.26.jar
|
103
124
|
- classpath/jline-0.9.94.jar
|
125
|
+
- classpath/joda-time-2.9.9.jar
|
104
126
|
- classpath/jsch-0.1.42.jar
|
105
127
|
- classpath/jsp-api-2.1.jar
|
128
|
+
- classpath/jsr305-3.0.0.jar
|
106
129
|
- classpath/log4j-1.2.17.jar
|
107
130
|
- classpath/netty-3.7.0.Final.jar
|
108
131
|
- classpath/orc-core-1.4.0.jar
|
132
|
+
- classpath/paranamer-2.3.jar
|
109
133
|
- classpath/protobuf-java-2.5.0.jar
|
110
134
|
- classpath/servlet-api-2.5.jar
|
135
|
+
- classpath/snappy-java-1.0.4.1.jar
|
136
|
+
- classpath/stax-api-1.0-2.jar
|
111
137
|
- classpath/xercesImpl-2.9.1.jar
|
112
138
|
- classpath/xml-apis-1.3.04.jar
|
113
139
|
- classpath/xmlenc-0.52.jar
|