embulk-output-redshift 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -4
- data/classpath/{embulk-output-jdbc-0.5.0.jar → embulk-output-jdbc-0.5.1.jar} +0 -0
- data/classpath/{embulk-output-postgresql-0.5.0.jar → embulk-output-postgresql-0.5.1.jar} +0 -0
- data/classpath/embulk-output-redshift-0.5.1.jar +0 -0
- data/src/main/java/org/embulk/output/RedshiftOutputPlugin.java +1 -0
- data/src/main/java/org/embulk/output/redshift/RedshiftCopyBatchInsert.java +121 -58
- metadata +5 -5
- data/classpath/embulk-output-redshift-0.5.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8c2e9f95d662d1860ccba4b210c6dae2d1cc44b
|
4
|
+
data.tar.gz: cd0afa3f4352bc9732e27d1fdd132398114356c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15a7eb5c206c87822f163dd105af21444cf3d925f01e01e0eb7e4ab0e499646b814e9f9b377b49df677b79cb7a559ac97674f7179f699f2dc3cd96523e99216c
|
7
|
+
data.tar.gz: dd060ac7a977ee44b20e5927c3e121d2b930e81b18282b0407a658921db8c7f9fb392fdb277c2078b9dbf08f13de3d987337cb2064c7637205726b82035f85f9
|
data/README.md
CHANGED
@@ -5,8 +5,8 @@ Redshift output plugins for Embulk loads records to Redshift.
|
|
5
5
|
## Overview
|
6
6
|
|
7
7
|
* **Plugin type**: output
|
8
|
-
* **Load all or nothing**: depnds on the mode. see
|
9
|
-
* **Resume supported**: depnds on the mode. see
|
8
|
+
* **Load all or nothing**: depnds on the mode. see below.
|
9
|
+
* **Resume supported**: depnds on the mode. see below.
|
10
10
|
|
11
11
|
## Configuration
|
12
12
|
|
@@ -19,11 +19,11 @@ Redshift output plugins for Embulk loads records to Redshift.
|
|
19
19
|
- **table**: destination table name (string, required)
|
20
20
|
- **access_key_id**: access key id for AWS
|
21
21
|
- **secret_access_key**: secret access key for AWS
|
22
|
-
- **iam_user_name**: IAM user name for uploading temporary files to S3. The user should have permissions of `s3:GetObject`, `s3:PutObject`, `s3:ListBucket` and `sts:GetFederationToken`.
|
22
|
+
- **iam_user_name**: IAM user name for uploading temporary files to S3. The user should have permissions of `s3:GetObject`, `s3:PutObject`, `s3:DeleteObject`, , `s3:ListBucket` and `sts:GetFederationToken`. (string, default: "", but we strongly recommend that you use IAM user for security reasons. see below.)
|
23
23
|
- **s3_bucket**: S3 bucket name for temporary files
|
24
24
|
- **s3_key_prefix**: S3 key prefix for temporary files (string, default:"")
|
25
25
|
- **options**: extra connection properties (hash, default: {})
|
26
|
-
- **mode**: "
|
26
|
+
- **mode**: "insert", "insert_direct", "truncate_insert", or "replace". See below. (string, required)
|
27
27
|
- **batch_size**: size of a single batch insert (integer, default: 16777216)
|
28
28
|
- **default_timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp into a SQL string. This default_timezone option is used to control the timezone. You can overwrite timezone for each columns using column_options option. (string, default: `UTC`)
|
29
29
|
- **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column.
|
@@ -32,6 +32,7 @@ Redshift output plugins for Embulk loads records to Redshift.
|
|
32
32
|
- **timestamp_format**: If input column type (embulk type) is timestamp and value_type is `string` or `nstring`, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, default: `%Y-%m-%d %H:%M:%S.%6N`)
|
33
33
|
- **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default)
|
34
34
|
|
35
|
+
|
35
36
|
### Modes
|
36
37
|
|
37
38
|
* **insert**:
|
@@ -98,3 +99,8 @@ out:
|
|
98
99
|
```
|
99
100
|
$ ./gradlew gem
|
100
101
|
```
|
102
|
+
|
103
|
+
### Security
|
104
|
+
This plugin requires AWS access credentials so that it may write temporary files to S3. There are two security options, Standard and Federated.
|
105
|
+
To use Standard security, give **aws_key_id** and **secret_access_key**. To use Federated mode, also give the **iam_user_name** field.
|
106
|
+
Federated mode really means temporary credentials, so that a man-in-the-middle attack will see AWS credentials that are only valid for 1 calendar day after the transaction.
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,15 +1,27 @@
|
|
1
1
|
package org.embulk.output.redshift;
|
2
2
|
|
3
|
-
import java.
|
4
|
-
import java.util.concurrent.Callable;
|
5
|
-
import java.util.UUID;
|
3
|
+
import java.io.BufferedWriter;
|
6
4
|
import java.io.File;
|
7
|
-
import java.io.IOException;
|
8
5
|
import java.io.FileOutputStream;
|
6
|
+
import java.io.IOException;
|
9
7
|
import java.io.OutputStreamWriter;
|
10
|
-
import java.io.Closeable;
|
11
|
-
import java.io.BufferedWriter;
|
12
8
|
import java.sql.SQLException;
|
9
|
+
import java.util.ArrayList;
|
10
|
+
import java.util.List;
|
11
|
+
import java.util.UUID;
|
12
|
+
import java.util.concurrent.Callable;
|
13
|
+
import java.util.concurrent.ExecutionException;
|
14
|
+
import java.util.concurrent.ExecutorService;
|
15
|
+
import java.util.concurrent.Executors;
|
16
|
+
import java.util.concurrent.Future;
|
17
|
+
import java.util.concurrent.TimeUnit;
|
18
|
+
import java.util.zip.GZIPOutputStream;
|
19
|
+
|
20
|
+
import org.embulk.output.jdbc.JdbcSchema;
|
21
|
+
import org.embulk.output.postgresql.AbstractPostgreSQLCopyBatchInsert;
|
22
|
+
import org.embulk.spi.Exec;
|
23
|
+
import org.slf4j.Logger;
|
24
|
+
|
13
25
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
14
26
|
import com.amazonaws.auth.BasicSessionCredentials;
|
15
27
|
import com.amazonaws.auth.policy.Policy;
|
@@ -19,13 +31,9 @@ import com.amazonaws.auth.policy.Statement.Effect;
|
|
19
31
|
import com.amazonaws.auth.policy.actions.S3Actions;
|
20
32
|
import com.amazonaws.services.s3.AmazonS3Client;
|
21
33
|
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
|
34
|
+
import com.amazonaws.services.securitytoken.model.Credentials;
|
22
35
|
import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
|
23
36
|
import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
|
24
|
-
import com.amazonaws.services.securitytoken.model.Credentials;
|
25
|
-
import org.slf4j.Logger;
|
26
|
-
import org.embulk.spi.Exec;
|
27
|
-
import org.embulk.output.jdbc.JdbcSchema;
|
28
|
-
import org.embulk.output.postgresql.AbstractPostgreSQLCopyBatchInsert;
|
29
37
|
|
30
38
|
public class RedshiftCopyBatchInsert
|
31
39
|
extends AbstractPostgreSQLCopyBatchInsert
|
@@ -35,13 +43,16 @@ public class RedshiftCopyBatchInsert
|
|
35
43
|
private final String s3BucketName;
|
36
44
|
private final String s3KeyPrefix;
|
37
45
|
private final String iamReaderUserName;
|
46
|
+
private final AWSCredentialsProvider credentialsProvider;
|
38
47
|
private final AmazonS3Client s3;
|
39
48
|
private final AWSSecurityTokenServiceClient sts;
|
49
|
+
private final ExecutorService executorService;
|
40
50
|
|
41
51
|
private RedshiftOutputConnection connection = null;
|
42
52
|
private String copySqlBeforeFrom = null;
|
43
53
|
private long totalRows;
|
44
54
|
private int fileCount;
|
55
|
+
private List<Future<Void>> uploadAndCopyFutures;
|
45
56
|
|
46
57
|
public static final String COPY_AFTER_FROM = "GZIP DELIMITER '\\t' NULL '\\\\N' ESCAPE TRUNCATECOLUMNS ACCEPTINVCHARS STATUPDATE OFF COMPUPDATE OFF";
|
47
58
|
|
@@ -58,8 +69,12 @@ public class RedshiftCopyBatchInsert
|
|
58
69
|
this.s3KeyPrefix = s3KeyPrefix + "/";
|
59
70
|
}
|
60
71
|
this.iamReaderUserName = iamReaderUserName;
|
72
|
+
this.credentialsProvider = credentialsProvider;
|
61
73
|
this.s3 = new AmazonS3Client(credentialsProvider); // TODO options
|
62
74
|
this.sts = new AWSSecurityTokenServiceClient(credentialsProvider); // options
|
75
|
+
|
76
|
+
this.executorService = Executors.newCachedThreadPool();
|
77
|
+
this.uploadAndCopyFutures = new ArrayList<Future<Void>>();
|
63
78
|
}
|
64
79
|
|
65
80
|
@Override
|
@@ -86,28 +101,51 @@ public class RedshiftCopyBatchInsert
|
|
86
101
|
{
|
87
102
|
File file = closeCurrentFile(); // flush buffered data in writer
|
88
103
|
|
89
|
-
|
90
|
-
new
|
91
|
-
|
104
|
+
String s3KeyName = s3KeyPrefix + UUID.randomUUID().toString();
|
105
|
+
UploadTask uploadTask = new UploadTask(file, batchRows, s3KeyName);
|
106
|
+
Future<Void> uploadFuture = executorService.submit(uploadTask);
|
107
|
+
uploadAndCopyFutures.add(uploadFuture);
|
108
|
+
|
109
|
+
CopyTask copyTask = new CopyTask(uploadFuture, s3KeyName);
|
110
|
+
uploadAndCopyFutures.add(executorService.submit(copyTask));
|
92
111
|
|
93
112
|
fileCount++;
|
94
113
|
totalRows += batchRows;
|
95
114
|
batchRows = 0;
|
96
115
|
|
97
116
|
openNewFile();
|
98
|
-
file.delete();
|
99
117
|
}
|
100
118
|
|
101
119
|
@Override
|
102
120
|
public void finish() throws IOException, SQLException
|
103
121
|
{
|
104
122
|
super.finish();
|
123
|
+
|
124
|
+
for (Future<Void> uploadAndCopyFuture : uploadAndCopyFutures) {
|
125
|
+
try {
|
126
|
+
uploadAndCopyFuture.get();
|
127
|
+
|
128
|
+
} catch (InterruptedException e) {
|
129
|
+
throw new RuntimeException(e);
|
130
|
+
} catch (ExecutionException e) {
|
131
|
+
if (e.getCause() instanceof SQLException) {
|
132
|
+
throw (SQLException)e.getCause();
|
133
|
+
}
|
134
|
+
throw new RuntimeException(e);
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
105
138
|
logger.info("Loaded {} files.", fileCount);
|
106
139
|
}
|
107
140
|
|
108
141
|
@Override
|
109
142
|
public void close() throws IOException, SQLException
|
110
143
|
{
|
144
|
+
executorService.shutdownNow();
|
145
|
+
try {
|
146
|
+
executorService.awaitTermination(60, TimeUnit.SECONDS);
|
147
|
+
} catch (InterruptedException e) {}
|
148
|
+
|
111
149
|
s3.shutdown();
|
112
150
|
closeCurrentFile().delete();
|
113
151
|
if (connection != null) {
|
@@ -127,60 +165,97 @@ public class RedshiftCopyBatchInsert
|
|
127
165
|
.withActions(S3Actions.GetObject)
|
128
166
|
.withResources(new Resource("arn:aws:s3:::"+s3BucketName+"/"+s3KeyName)) // TODO encode file name using percent encoding
|
129
167
|
);
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
168
|
+
if (iamReaderUserName != null && iamReaderUserName.length() > 0) {
|
169
|
+
GetFederationTokenRequest req = new GetFederationTokenRequest();
|
170
|
+
req.setDurationSeconds(86400); // 3600 - 129600
|
171
|
+
req.setName(iamReaderUserName);
|
172
|
+
req.setPolicy(policy.toJson());
|
173
|
+
|
174
|
+
GetFederationTokenResult res = sts.getFederationToken(req);
|
175
|
+
Credentials c = res.getCredentials();
|
176
|
+
|
177
|
+
return new BasicSessionCredentials(
|
178
|
+
c.getAccessKeyId(),
|
179
|
+
c.getSecretAccessKey(),
|
180
|
+
c.getSessionToken());
|
181
|
+
} else {
|
182
|
+
return new BasicSessionCredentials(credentialsProvider.getCredentials().getAWSAccessKeyId(),
|
183
|
+
credentialsProvider.getCredentials().getAWSSecretKey(), null);
|
184
|
+
}
|
142
185
|
}
|
143
186
|
|
144
|
-
private class
|
187
|
+
private class UploadTask implements Callable<Void>
|
145
188
|
{
|
146
189
|
private final File file;
|
147
190
|
private final int batchRows;
|
148
191
|
private final String s3KeyName;
|
149
192
|
|
150
|
-
public
|
193
|
+
public UploadTask(File file, int batchRows, String s3KeyName)
|
151
194
|
{
|
152
195
|
this.file = file;
|
153
196
|
this.batchRows = batchRows;
|
154
197
|
this.s3KeyName = s3KeyName;
|
155
198
|
}
|
156
199
|
|
157
|
-
public Void call()
|
200
|
+
public Void call() {
|
158
201
|
logger.info(String.format("Uploading file id %s to S3 (%,d bytes %,d rows)",
|
159
202
|
s3KeyName, file.length(), batchRows));
|
160
|
-
s3.putObject(s3BucketName, s3KeyName, file);
|
161
203
|
|
162
|
-
RedshiftOutputConnection con = connector.connect(true);
|
163
204
|
try {
|
164
|
-
logger.info("Running COPY from file {}", s3KeyName);
|
165
|
-
|
166
|
-
// create temporary credential right before COPY operation because
|
167
|
-
// it has timeout.
|
168
|
-
// TODO skip this step if iamReaderUserName is not set
|
169
|
-
BasicSessionCredentials creds = generateReaderSessionCredentials(s3KeyName);
|
170
|
-
|
171
205
|
long startTime = System.currentTimeMillis();
|
172
|
-
|
206
|
+
s3.putObject(s3BucketName, s3KeyName, file);
|
173
207
|
double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
|
174
208
|
|
175
|
-
logger.info(String.format("
|
209
|
+
logger.info(String.format("Uploaded file %s (%.2f seconds)", s3KeyName, seconds));
|
210
|
+
} finally {
|
211
|
+
file.delete();
|
212
|
+
}
|
213
|
+
|
214
|
+
return null;
|
215
|
+
}
|
216
|
+
}
|
176
217
|
|
218
|
+
private class CopyTask implements Callable<Void>
|
219
|
+
{
|
220
|
+
private final Future<Void> uploadFuture;
|
221
|
+
private final String s3KeyName;
|
222
|
+
|
223
|
+
public CopyTask(Future<Void> uploadFuture, String s3KeyName)
|
224
|
+
{
|
225
|
+
this.uploadFuture = uploadFuture;
|
226
|
+
this.s3KeyName = s3KeyName;
|
227
|
+
}
|
228
|
+
|
229
|
+
public Void call() throws SQLException, InterruptedException, ExecutionException {
|
230
|
+
try {
|
231
|
+
uploadFuture.get();
|
232
|
+
|
233
|
+
RedshiftOutputConnection con = connector.connect(true);
|
234
|
+
try {
|
235
|
+
logger.info("Running COPY from file {}", s3KeyName);
|
236
|
+
|
237
|
+
// create temporary credential right before COPY operation because
|
238
|
+
// it has timeout.
|
239
|
+
BasicSessionCredentials creds = generateReaderSessionCredentials(s3KeyName);
|
240
|
+
|
241
|
+
long startTime = System.currentTimeMillis();
|
242
|
+
con.runCopy(buildCopySQL(creds));
|
243
|
+
double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
|
244
|
+
|
245
|
+
logger.info(String.format("Loaded file %s (%.2f seconds for COPY)", s3KeyName, seconds));
|
246
|
+
|
247
|
+
} finally {
|
248
|
+
con.close();
|
249
|
+
}
|
177
250
|
} finally {
|
178
|
-
|
251
|
+
s3.deleteObject(s3BucketName, s3KeyName);
|
179
252
|
}
|
180
253
|
|
181
254
|
return null;
|
182
255
|
}
|
183
256
|
|
257
|
+
|
258
|
+
|
184
259
|
private String buildCopySQL(BasicSessionCredentials creds)
|
185
260
|
{
|
186
261
|
StringBuilder sb = new StringBuilder();
|
@@ -194,25 +269,13 @@ public class RedshiftCopyBatchInsert
|
|
194
269
|
sb.append(creds.getAWSAccessKeyId());
|
195
270
|
sb.append(";aws_secret_access_key=");
|
196
271
|
sb.append(creds.getAWSSecretKey());
|
197
|
-
|
198
|
-
|
272
|
+
if (creds.getSessionToken() != null) {
|
273
|
+
sb.append(";token=");
|
274
|
+
sb.append(creds.getSessionToken());
|
275
|
+
}
|
199
276
|
sb.append("' ");
|
200
277
|
sb.append(COPY_AFTER_FROM);
|
201
278
|
return sb.toString();
|
202
279
|
}
|
203
280
|
}
|
204
|
-
|
205
|
-
private static class DeleteFileFinalizer implements Closeable
|
206
|
-
{
|
207
|
-
private File file;
|
208
|
-
|
209
|
-
public DeleteFileFinalizer(File file) {
|
210
|
-
this.file = file;
|
211
|
-
}
|
212
|
-
|
213
|
-
@Override
|
214
|
-
public void close() throws IOException {
|
215
|
-
file.delete();
|
216
|
-
}
|
217
|
-
}
|
218
281
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-redshift
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Inserts or updates records to a table.
|
14
14
|
email:
|
@@ -30,9 +30,9 @@ files:
|
|
30
30
|
- classpath/aws-java-sdk-sts-1.10.33.jar
|
31
31
|
- classpath/commons-codec-1.6.jar
|
32
32
|
- classpath/commons-logging-1.1.3.jar
|
33
|
-
- classpath/embulk-output-jdbc-0.5.
|
34
|
-
- classpath/embulk-output-postgresql-0.5.
|
35
|
-
- classpath/embulk-output-redshift-0.5.
|
33
|
+
- classpath/embulk-output-jdbc-0.5.1.jar
|
34
|
+
- classpath/embulk-output-postgresql-0.5.1.jar
|
35
|
+
- classpath/embulk-output-redshift-0.5.1.jar
|
36
36
|
- classpath/httpclient-4.3.6.jar
|
37
37
|
- classpath/httpcore-4.3.3.jar
|
38
38
|
- classpath/postgresql-9.4-1205-jdbc41.jar
|
Binary file
|