embulk-output-orc 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 376187534c3d236c20656a656ed04d5c858e9a83
4
- data.tar.gz: 4d4b74153e1e2fe0248b4ba5d097723dbc42022e
3
+ metadata.gz: 9a4d872e06384fd47cf8c1707398234974e0ecb3
4
+ data.tar.gz: bb3855c44c55f16bc5e1707f09e3a1b6c683aee6
5
5
  SHA512:
6
- metadata.gz: 888dab123d9cea81defbd51c0d5b812bd6c90e84c578927a72a491a8fffb4fac61fcced52ba7d759fdd1c36d9dba08a424c2d704b6f3f1ae51bbfdb3fac50728
7
- data.tar.gz: bb7c0de447e7bf98b2ad7e7ba2b309854b0beaf37b6c10fd2a82be3736f6e39c8a576c985be0c62e9223ab24cb1a9bd4662e64d12974be490cd042002c933699
6
+ metadata.gz: a16e0c62d57089d5ff431021b97586a8203a423ab3f8797a4e2274f26cb168a91a59ae72d6d42b4f2540209565d05d9e3f1ae66af242e52c90ab1f239afe90cd
7
+ data.tar.gz: 21f77bd871793f06b014ce9df5c08ab7e0f0d610b7cc69b749e415a1f181891288ba06211dbe3d769bfd756bfaaf344d1eeecd42db9f085a7feba28560921c96
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Orc output plugin for Embulk
2
2
 
3
3
  [![Build Status](https://travis-ci.org/yuokada/embulk-output-orc.svg?branch=master)](https://travis-ci.org/yuokada/embulk-output-orc)
4
+ [![Gem Version](https://badge.fury.io/rb/embulk-output-orc.svg)](https://badge.fury.io/rb/embulk-output-orc)
4
5
 
5
6
  ## Overview
6
7
 
@@ -12,6 +13,7 @@
12
13
  ## Configuration
13
14
 
14
15
  - **path_prefix**: A prefix of output path. (string, required)
16
+ - support: `file`, `s3n` and `s3a`.
15
17
  - **file_ext**: An extension of output file. (string, default: `.orc`)
16
18
  - **sequence_format**: (string, default: `.%03d`)
17
19
  - **buffer_size**: Set the ORC buffer size (integer, default: `10000`)
@@ -20,6 +22,12 @@
20
22
  - **overwrite**: (LocalFileSystem only) Overwrite if output files already exist. (boolean, default: `false`)
21
23
  - **default_from_timezone** Time zone of timestamp columns. This can be overwritten for each column using column_options (DateTimeZone, default: `UTC`)
22
24
 
25
+ - **auth_method**: name of mechanism to authenticate requests (basic, env, instance, profile, properties, anonymous, or session. default: basic)
26
+ see: https://github.com/embulk/embulk-input-s3#configuration
27
+
28
+ - `env`, `basic`, `profile`, `default`, `session`, `anonymous`, `properties`
29
+
30
+
23
31
  ## Example
24
32
 
25
33
  ```yaml
@@ -32,6 +40,17 @@ out:
32
40
  overwrite: true
33
41
  ```
34
42
 
43
+ ## ChangeLog
44
+
45
+ ### ver 0.2.0
46
+
47
+ - support: output to s3
48
+
49
+ - `s3n`, `s3a` protocol
50
+
51
+ ### ver 0.1.0
52
+
53
+ - initial release
35
54
 
36
55
  ## Build
37
56
 
@@ -8,13 +8,16 @@ import com.github.jrubygradle.JRubyExec
8
8
  repositories {
9
9
  mavenCentral()
10
10
  jcenter()
11
+ maven {
12
+ url "http://dl.bintray.com/embulk-input-s3/maven"
13
+ }
11
14
  }
12
15
  configurations {
13
16
  provided
14
17
  runtime.exclude group: "org.slf4j", module: "slf4j-log4j12"
15
18
  }
16
19
 
17
- version = "0.1.0"
20
+ version = "0.2.0"
18
21
 
19
22
  sourceCompatibility = 1.8
20
23
  targetCompatibility = 1.8
@@ -27,6 +30,10 @@ dependencies {
27
30
  compile "org.apache.orc:orc-core:1.4.0"
28
31
  compile "org.apache.hadoop:hadoop-hdfs:2.6.4"
29
32
 
33
+ compile 'org.embulk.input.s3:embulk-util-aws-credentials:0.2.8'
34
+ compile "com.amazonaws:aws-java-sdk-s3:1.10.33"
35
+ compile "org.apache.hadoop:hadoop-aws:2.7.3"
36
+
30
37
  testCompile "junit:junit:4.+"
31
38
  testCompile "org.embulk:embulk-core:0.8.29:tests"
32
39
  testCompile "org.embulk:embulk-standards:0.8.29"
@@ -29,6 +29,8 @@ import org.embulk.spi.TransactionalPageOutput;
29
29
  import org.embulk.spi.time.TimestampFormatter;
30
30
  import org.embulk.spi.type.Type;
31
31
  import org.embulk.spi.util.Timestamps;
32
+ import org.embulk.util.aws.credentials.AwsCredentials;
33
+ import org.embulk.util.aws.credentials.AwsCredentialsTask;
32
34
  import org.joda.time.DateTimeZone;
33
35
  import org.joda.time.format.DateTimeFormat;
34
36
  import org.joda.time.format.DateTimeFormatter;
@@ -41,7 +43,7 @@ public class OrcOutputPlugin
41
43
  implements OutputPlugin
42
44
  {
43
45
  public interface PluginTask
44
- extends Task, TimestampFormatter.Task
46
+ extends Task, TimestampFormatter.Task, AwsCredentialsTask
45
47
  {
46
48
  @Config("path_prefix")
47
49
  String getPathPrefix();
@@ -176,7 +178,7 @@ public class OrcOutputPlugin
176
178
  return oschema;
177
179
  }
178
180
 
179
- private Configuration getHadoopConfiguration()
181
+ private Configuration getHadoopConfiguration(PluginTask task)
180
182
  {
181
183
  Configuration conf = new Configuration();
182
184
 
@@ -185,14 +187,25 @@ public class OrcOutputPlugin
185
187
  conf.set("fs.file.impl", LocalFileSystem.class.getName());
186
188
  // see: https://stackoverflow.com/questions/20833444/how-to-set-objects-in-hadoop-configuration
187
189
 
190
+ AwsCredentials.getAWSCredentialsProvider(task);
191
+ if (task.getAccessKeyId().isPresent()) {
192
+ conf.set("fs.s3a.access.key", task.getAccessKeyId().get());
193
+ conf.set("fs.s3n.awsAccessKeyId", task.getAccessKeyId().get());
194
+ }
195
+ if (task.getSecretAccessKey().isPresent()) {
196
+ conf.set("fs.s3a.secret.key", task.getSecretAccessKey().get());
197
+ conf.set("fs.s3n.awsSecretAccessKey", task.getSecretAccessKey().get());
198
+ }
199
+
188
200
  return conf;
189
201
  }
190
202
 
191
203
  private Writer createWriter(PluginTask task, Schema schema, int processorIndex)
192
204
  {
193
- final TimestampFormatter[] timestampFormatters = Timestamps.newTimestampColumnFormatters(task, schema, task.getColumnOptions());
205
+ final TimestampFormatter[] timestampFormatters = Timestamps
206
+ .newTimestampColumnFormatters(task, schema, task.getColumnOptions());
194
207
 
195
- Configuration conf = getHadoopConfiguration();
208
+ Configuration conf = getHadoopConfiguration(task);
196
209
  TypeDescription oschema = getSchema(schema);
197
210
 
198
211
  // see: https://groups.google.com/forum/#!topic/vertx/lLb-slzpWVg
@@ -256,7 +269,8 @@ public class OrcOutputPlugin
256
269
  this.writer = writer;
257
270
 
258
271
  // formatter
259
- DateTimeZone defaultTimeZone = DateTimeZone.forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
272
+ DateTimeZone defaultTimeZone = DateTimeZone
273
+ .forTimeZone(task.getDefaultFromTimeZone().toTimeZone());
260
274
  formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZone(defaultTimeZone);
261
275
  }
262
276
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-orc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuokada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-23 00:00:00.000000000 Z
11
+ date: 2017-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,18 +62,26 @@ files:
62
62
  - src/main/java/org/embulk/output/orc/OrcOutputPlugin.java
63
63
  - src/main/java/org/embulk/output/orc/OrcOutputPluginHelper.java
64
64
  - src/test/java/org/embulk/output/orc/TestOrcOutputPlugin.java
65
+ - classpath/activation-1.1.jar
65
66
  - classpath/aircompressor-0.3.jar
66
67
  - classpath/apacheds-i18n-2.0.0-M15.jar
67
68
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
68
69
  - classpath/api-asn1-api-1.0.0-M20.jar
69
70
  - classpath/api-util-1.0.0-M20.jar
70
71
  - classpath/asm-3.1.jar
72
+ - classpath/avro-1.7.4.jar
73
+ - classpath/aws-java-sdk-1.7.4.jar
74
+ - classpath/aws-java-sdk-core-1.10.33.jar
75
+ - classpath/aws-java-sdk-kms-1.10.33.jar
76
+ - classpath/aws-java-sdk-s3-1.10.33.jar
77
+ - classpath/commons-beanutils-1.7.0.jar
71
78
  - classpath/commons-cli-1.2.jar
72
79
  - classpath/commons-codec-1.6.jar
73
80
  - classpath/commons-collections-3.2.2.jar
74
81
  - classpath/commons-compress-1.4.1.jar
75
82
  - classpath/commons-configuration-1.6.jar
76
83
  - classpath/commons-daemon-1.0.13.jar
84
+ - classpath/commons-digester-1.8.jar
77
85
  - classpath/commons-el-1.0.jar
78
86
  - classpath/commons-httpclient-3.1.jar
79
87
  - classpath/commons-io-2.4.jar
@@ -81,33 +89,51 @@ files:
81
89
  - classpath/commons-logging-1.1.3.jar
82
90
  - classpath/commons-math3-3.1.1.jar
83
91
  - classpath/commons-net-3.1.jar
84
- - classpath/curator-client-2.6.0.jar
85
- - classpath/curator-framework-2.6.0.jar
86
- - classpath/embulk-output-orc-0.1.0.jar
92
+ - classpath/curator-client-2.7.1.jar
93
+ - classpath/curator-framework-2.7.1.jar
94
+ - classpath/curator-recipes-2.7.1.jar
95
+ - classpath/embulk-output-orc-0.2.0.jar
96
+ - classpath/embulk-util-aws-credentials-0.2.8.jar
87
97
  - classpath/gson-2.2.4.jar
88
- - classpath/hadoop-annotations-2.6.4.jar
89
- - classpath/hadoop-auth-2.6.4.jar
90
- - classpath/hadoop-common-2.6.4.jar
98
+ - classpath/hadoop-annotations-2.7.3.jar
99
+ - classpath/hadoop-auth-2.7.3.jar
100
+ - classpath/hadoop-aws-2.7.3.jar
101
+ - classpath/hadoop-common-2.7.3.jar
91
102
  - classpath/hadoop-hdfs-2.6.4.jar
92
103
  - classpath/hive-storage-api-2.2.1.jar
93
104
  - classpath/htrace-core-3.0.4.jar
94
- - classpath/httpclient-4.2.5.jar
95
- - classpath/httpcore-4.2.4.jar
105
+ - classpath/htrace-core-3.1.0-incubating.jar
106
+ - classpath/httpclient-4.3.6.jar
107
+ - classpath/httpcore-4.3.3.jar
96
108
  - classpath/jackson-core-asl-1.9.13.jar
109
+ - classpath/jackson-jaxrs-1.8.3.jar
97
110
  - classpath/jackson-mapper-asl-1.9.13.jar
111
+ - classpath/jackson-xc-1.8.3.jar
98
112
  - classpath/jasper-runtime-5.5.23.jar
113
+ - classpath/java-xmlbuilder-0.4.jar
114
+ - classpath/jaxb-api-2.2.2.jar
115
+ - classpath/jaxb-impl-2.2.3-1.jar
116
+ - classpath/jcl-over-slf4j-1.7.12.jar
99
117
  - classpath/jersey-core-1.9.jar
118
+ - classpath/jersey-json-1.9.jar
100
119
  - classpath/jersey-server-1.9.jar
120
+ - classpath/jets3t-0.9.0.jar
121
+ - classpath/jettison-1.1.jar
101
122
  - classpath/jetty-6.1.26.jar
102
123
  - classpath/jetty-util-6.1.26.jar
103
124
  - classpath/jline-0.9.94.jar
125
+ - classpath/joda-time-2.9.9.jar
104
126
  - classpath/jsch-0.1.42.jar
105
127
  - classpath/jsp-api-2.1.jar
128
+ - classpath/jsr305-3.0.0.jar
106
129
  - classpath/log4j-1.2.17.jar
107
130
  - classpath/netty-3.7.0.Final.jar
108
131
  - classpath/orc-core-1.4.0.jar
132
+ - classpath/paranamer-2.3.jar
109
133
  - classpath/protobuf-java-2.5.0.jar
110
134
  - classpath/servlet-api-2.5.jar
135
+ - classpath/snappy-java-1.0.4.1.jar
136
+ - classpath/stax-api-1.0-2.jar
111
137
  - classpath/xercesImpl-2.9.1.jar
112
138
  - classpath/xml-apis-1.3.04.jar
113
139
  - classpath/xmlenc-0.52.jar