embulk-output-orc 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 376187534c3d236c20656a656ed04d5c858e9a83
4
- data.tar.gz: 4d4b74153e1e2fe0248b4ba5d097723dbc42022e
3
+ metadata.gz: 9a4d872e06384fd47cf8c1707398234974e0ecb3
4
+ data.tar.gz: bb3855c44c55f16bc5e1707f09e3a1b6c683aee6
5
5
  SHA512:
6
- metadata.gz: 888dab123d9cea81defbd51c0d5b812bd6c90e84c578927a72a491a8fffb4fac61fcced52ba7d759fdd1c36d9dba08a424c2d704b6f3f1ae51bbfdb3fac50728
7
- data.tar.gz: bb7c0de447e7bf98b2ad7e7ba2b309854b0beaf37b6c10fd2a82be3736f6e39c8a576c985be0c62e9223ab24cb1a9bd4662e64d12974be490cd042002c933699
6
+ metadata.gz: a16e0c62d57089d5ff431021b97586a8203a423ab3f8797a4e2274f26cb168a91a59ae72d6d42b4f2540209565d05d9e3f1ae66af242e52c90ab1f239afe90cd
7
+ data.tar.gz: 21f77bd871793f06b014ce9df5c08ab7e0f0d610b7cc69b749e415a1f181891288ba06211dbe3d769bfd756bfaaf344d1eeecd42db9f085a7feba28560921c96
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Orc output plugin for Embulk
2
2
 
3
3
  [![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
4
+ [![Gem Version](https://badge.fury.io/rb/embulk-output-orc.svg)](https://badge.fury.io/rb/embulk-output-orc)
4
5
 
5
6
  ## Overview
6
7
 
@@ -12,6 +13,7 @@
12
13
  ## Configuration
13
14
 
14
15
  - **path_prefix**: A prefix of output path. (string, required)
16
+ - support: `file`, `s3n` and `s3a`.
15
17
  - **file_ext**: An extension of output file. (string, default: `.orc`)
16
18
  - **sequence_format**: (string, default: `.%03d`)
17
19
  - **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
@@ -20,6 +22,12 @@
20
22
  - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
21
23
  - **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
22
24
 
25
+ - **auth_method**: name of mechanism to authenticate requests (basic, env, instance, profile, properties, anonymous, or session. default: basic)
26
+ see: https://github.com/embulk/embulk-input-s3#configuration
27
+
28
+ - `env`, `basic`, `profile`, `default`, `session`, `anonymous`, `properties`
29
+
30
+
23
31
  ## Example
24
32
 
25
33
  ```yaml
@@ -32,6 +40,17 @@ out:
32
40
  overwrite: true
33
41
  ```
34
42
 
43
+ ## ChangeLog
44
+
45
+ ### ver 0.2.0
46
+
47
+ - support: output to s3
48
+
49
+ - `s3n`, `s3a` protocol
50
+
51
+ ### ver 0.1.0
52
+
53
+ - initial release
35
54
 
36
55
  ## Build
37
56
 
@@ -8,13 +8,16 @@ import com.github.jrubygradle.JRubyExec
8
8
  repositories {
9
9
  mavenCentral()
10
10
  jcenter()
11
+ maven {
12
+ url "http://dl.bintray.com/embulk-input-s3/maven"
13
+ }
11
14
  }
12
15
  configurations {
13
16
  provided
14
17
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
15
18
  }
16
19
 
17
- version = "0.1.0"
20
+ version = "0.2.0"
18
21
 
19
22
  sourceCompatibility = 1.8
20
23
  targetCompatibility = 1.8
@@ -27,6 +30,10 @@ dependencies {
27
30
  compile "org.apache.orc:orc-core:1.4.0"
28
31
  compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
29
32
 
33
+ compile 'org.embulk.input.s3:embulk-util-aws-credentials:0.2.8'
34
+ compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
35
+ compile "org.apache.hadoop:hadoop-aws:2.7.3"
36
+
30
37
  testCompile "junit:junit:4.+"
31
38
  testCompile "org.embulk:embulk-core:0.8.29:tests"
32
39
  testCompile "org.embulk:embulk-standards:0.8.29"
@@ -29,6 +29,8 @@ import org.embulk.spi.TransactionalPageOutput;
29
29
  import org.embulk.spi.time.TimestampFormatter;
30
30
  import org.embulk.spi.type.Type;
31
31
  import org.embulk.spi.util.Timestamps;
32
+ import org.embulk.util.aws.credentials.AwsCredentials;
33
+ import org.embulk.util.aws.credentials.AwsCredentialsTask;
32
34
  import org.joda.time.DateTimeZone;
33
35
  import org.joda.time.format.DateTimeFormat;
34
36
  import org.joda.time.format.DateTimeFormatter;
@@ -41,7 +43,7 @@ public class OrcOutputPlugin
41
43
  implements OutputPlugin
42
44
  {
43
45
  public interface PluginTask
44
- extends Task, TimestampFormatter.Task
46
+ extends Task, TimestampFormatter.Task, AwsCredentialsTask
45
47
  {
46
48
  @Config("path_prefix")
47
49
  String getPathPrefix();
@@ -176,7 +178,7 @@ public class OrcOutputPlugin
176
178
  return oschema;
177
179
  }
178
180
 
179
- private Configuration getHadoopConfiguration()
181
+ private Configuration getHadoopConfiguration(PluginTask task)
180
182
  {
181
183
  Configuration conf = new Configuration();
182
184
 
@@ -185,14 +187,25 @@ public class OrcOutputPlugin
185
187
  conf.set("fs.file.impl", LocalFileSystem.class.getName());
186
188
  // see: https://stackoverflow.com/questions/20833444/how-to-set-objects-in-hadoop-configuration
187
189
 
190
+ AwsCredentials.getAWSCredentialsProvider(task);
191
+ if (task.getAccessKeyId().isPresent()) {
192
+ conf.set("fs.s3a.access.key", task.getAccessKeyId().get());
193
+ conf.set("fs.s3n.awsAccessKeyId", task.getAccessKeyId().get());
194
+ }
195
+ if (task.getSecretAccessKey().isPresent()) {
196
+ conf.set("fs.s3a.secret.key", task.getSecretAccessKey().get());
197
+ conf.set("fs.s3n.awsSecretAccessKey", task.getSecretAccessKey().get());
198
+ }
199
+
188
200
  return conf;
189
201
  }
190
202
 
191
203
  private Writer createWriter(PluginTask task, Schema schema, int processorIndex)
192
204
  {
193
- final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
205
+ final TimestampFormatter[] timestampFormatters = Timestamps
206
+ .newTimestampColumnFormatters(task, schema, task.getColumnOptions());
194
207
 
195
- Configuration conf = getHadoopConfiguration();
208
+ Configuration conf = getHadoopConfiguration(task);
196
209
  TypeDescription oschema = getSchema(schema);
197
210
 
198
211
  // see: https://groups.google.com/forum/#!topic/vertx/lLb-slzpWVg
@@ -256,7 +269,8 @@ public class OrcOutputPlugin
256
269
  this.writer = writer;
257
270
 
258
271
  // formatter
259
- DateTimeZone defaultTimeZone = DateTimeZone.forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
272
+ DateTimeZone defaultTimeZone = DateTimeZone
273
+ .forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
260
274
  formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZone(defaultTimeZone);
261
275
  }
262
276
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-23 00:00:00.000000000 Z
11
+ date: 2017-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,18 +62,26 @@ files:
62
62
  - src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
63
63
  - src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
64
64
  - src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
65
+ - classpath/activation-1.1.jar
65
66
  - classpath/aircompressor-0.3.jar
66
67
  - classpath/apacheds-i18n-2.0.0-M15.jar
67
68
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
68
69
  - classpath/api-asn1-api-1.0.0-M20.jar
69
70
  - classpath/api-util-1.0.0-M20.jar
70
71
  - classpath/asm-3.1.jar
72
+ - classpath/avro-1.7.4.jar
73
+ - classpath/aws-java-sdk-1.7.4.jar
74
+ - classpath/aws-java-sdk-core-1.10.33.jar
75
+ - classpath/aws-java-sdk-kms-1.10.33.jar
76
+ - classpath/aws-java-sdk-s3-1.10.33.jar
77
+ - classpath/commons-beanutils-1.7.0.jar
71
78
  - classpath/commons-cli-1.2.jar
72
79
  - classpath/commons-codec-1.6.jar
73
80
  - classpath/commons-collections-3.2.2.jar
74
81
  - classpath/commons-compress-1.4.1.jar
75
82
  - classpath/commons-configuration-1.6.jar
76
83
  - classpath/commons-daemon-1.0.13.jar
84
+ - classpath/commons-digester-1.8.jar
77
85
  - classpath/commons-el-1.0.jar
78
86
  - classpath/commons-httpclient-3.1.jar
79
87
  - classpath/commons-io-2.4.jar
@@ -81,33 +89,51 @@ files:
81
89
  - classpath/commons-logging-1.1.3.jar
82
90
  - classpath/commons-math3-3.1.1.jar
83
91
  - classpath/commons-net-3.1.jar
84
- - classpath/curator-client-2.6.0.jar
85
- - classpath/curator-framework-2.6.0.jar
86
- - classpath/embulk-output-orc-0.1.0.jar
92
+ - classpath/curator-client-2.7.1.jar
93
+ - classpath/curator-framework-2.7.1.jar
94
+ - classpath/curator-recipes-2.7.1.jar
95
+ - classpath/embulk-output-orc-0.2.0.jar
96
+ - classpath/embulk-util-aws-credentials-0.2.8.jar
87
97
  - classpath/gson-2.2.4.jar
88
- - classpath/hadoop-annotations-2.6.4.jar
89
- - classpath/hadoop-auth-2.6.4.jar
90
- - classpath/hadoop-common-2.6.4.jar
98
+ - classpath/hadoop-annotations-2.7.3.jar
99
+ - classpath/hadoop-auth-2.7.3.jar
100
+ - classpath/hadoop-aws-2.7.3.jar
101
+ - classpath/hadoop-common-2.7.3.jar
91
102
  - classpath/hadoop-hdfs-2.6.4.jar
92
103
  - classpath/hive-storage-api-2.2.1.jar
93
104
  - classpath/htrace-core-3.0.4.jar
94
- - classpath/httpclient-4.2.5.jar
95
- - classpath/httpcore-4.2.4.jar
105
+ - classpath/htrace-core-3.1.0-incubating.jar
106
+ - classpath/httpclient-4.3.6.jar
107
+ - classpath/httpcore-4.3.3.jar
96
108
  - classpath/jackson-core-asl-1.9.13.jar
109
+ - classpath/jackson-jaxrs-1.8.3.jar
97
110
  - classpath/jackson-mapper-asl-1.9.13.jar
111
+ - classpath/jackson-xc-1.8.3.jar
98
112
  - classpath/jasper-runtime-5.5.23.jar
113
+ - classpath/java-xmlbuilder-0.4.jar
114
+ - classpath/jaxb-api-2.2.2.jar
115
+ - classpath/jaxb-impl-2.2.3-1.jar
116
+ - classpath/jcl-over-slf4j-1.7.12.jar
99
117
  - classpath/jersey-core-1.9.jar
118
+ - classpath/jersey-json-1.9.jar
100
119
  - classpath/jersey-server-1.9.jar
120
+ - classpath/jets3t-0.9.0.jar
121
+ - classpath/jettison-1.1.jar
101
122
  - classpath/jetty-6.1.26.jar
102
123
  - classpath/jetty-util-6.1.26.jar
103
124
  - classpath/jline-0.9.94.jar
125
+ - classpath/joda-time-2.9.9.jar
104
126
  - classpath/jsch-0.1.42.jar
105
127
  - classpath/jsp-api-2.1.jar
128
+ - classpath/jsr305-3.0.0.jar
106
129
  - classpath/log4j-1.2.17.jar
107
130
  - classpath/netty-3.7.0.Final.jar
108
131
  - classpath/orc-core-1.4.0.jar
132
+ - classpath/paranamer-2.3.jar
109
133
  - classpath/protobuf-java-2.5.0.jar
110
134
  - classpath/servlet-api-2.5.jar
135
+ - classpath/snappy-java-1.0.4.1.jar
136
+ - classpath/stax-api-1.0-2.jar
111
137
  - classpath/xercesImpl-2.9.1.jar
112
138
  - classpath/xml-apis-1.3.04.jar
113
139
  - classpath/xmlenc-0.52.jar