embulk-output-redshift 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +104 -104
  3. data/build.gradle +9 -9
  4. data/classpath/{aws-java-sdk-core-1.9.17.jar → aws-java-sdk-core-1.10.33.jar} +0 -0
  5. data/classpath/aws-java-sdk-kms-1.10.33.jar +0 -0
  6. data/classpath/aws-java-sdk-s3-1.10.33.jar +0 -0
  7. data/classpath/aws-java-sdk-sts-1.10.33.jar +0 -0
  8. data/classpath/embulk-output-jdbc-0.4.2.jar +0 -0
  9. data/classpath/embulk-output-postgresql-0.4.2.jar +0 -0
  10. data/classpath/{embulk-output-redshift-0.4.1.jar → embulk-output-redshift-0.4.2.jar} +0 -0
  11. data/classpath/{httpclient-4.3.4.jar → httpclient-4.3.6.jar} +0 -0
  12. data/classpath/{httpcore-4.3.2.jar → httpcore-4.3.3.jar} +0 -0
  13. data/classpath/postgresql-9.4-1205-jdbc41.jar +0 -0
  14. data/lib/embulk/output/redshift.rb +3 -3
  15. data/src/main/java/org/embulk/output/RedshiftOutputPlugin.java +151 -151
  16. data/src/main/java/org/embulk/output/redshift/RedshiftCopyBatchInsert.java +218 -218
  17. data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnection.java +122 -122
  18. data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnector.java +40 -40
  19. metadata +12 -17
  20. data/classpath/aws-java-sdk-kms-1.9.17.jar +0 -0
  21. data/classpath/aws-java-sdk-s3-1.9.17.jar +0 -0
  22. data/classpath/aws-java-sdk-sts-1.9.17.jar +0 -0
  23. data/classpath/embulk-output-jdbc-0.4.1.jar +0 -0
  24. data/classpath/embulk-output-postgresql-0.4.1.jar +0 -0
  25. data/classpath/jna-4.1.0.jar +0 -0
  26. data/classpath/jna-platform-4.1.0.jar +0 -0
  27. data/classpath/joda-time-2.8.1.jar +0 -0
  28. data/classpath/postgresql-9.4-1200-jdbc41.jar +0 -0
  29. data/classpath/slf4j-simple-1.7.7.jar +0 -0
  30. data/classpath/waffle-jna-1.7.jar +0 -0
@@ -1,218 +1,218 @@
1
- package org.embulk.output.redshift;
2
-
3
- import java.util.zip.GZIPOutputStream;
4
- import java.util.concurrent.Callable;
5
- import java.util.UUID;
6
- import java.io.File;
7
- import java.io.IOException;
8
- import java.io.FileOutputStream;
9
- import java.io.OutputStreamWriter;
10
- import java.io.Closeable;
11
- import java.io.BufferedWriter;
12
- import java.sql.SQLException;
13
- import com.amazonaws.auth.AWSCredentialsProvider;
14
- import com.amazonaws.auth.BasicSessionCredentials;
15
- import com.amazonaws.auth.policy.Policy;
16
- import com.amazonaws.auth.policy.Resource;
17
- import com.amazonaws.auth.policy.Statement;
18
- import com.amazonaws.auth.policy.Statement.Effect;
19
- import com.amazonaws.auth.policy.actions.S3Actions;
20
- import com.amazonaws.services.s3.AmazonS3Client;
21
- import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
22
- import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
23
- import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
24
- import com.amazonaws.services.securitytoken.model.Credentials;
25
- import org.slf4j.Logger;
26
- import org.embulk.spi.Exec;
27
- import org.embulk.output.jdbc.JdbcSchema;
28
- import org.embulk.output.postgresql.AbstractPostgreSQLCopyBatchInsert;
29
-
30
- public class RedshiftCopyBatchInsert
31
- extends AbstractPostgreSQLCopyBatchInsert
32
- {
33
- private final Logger logger = Exec.getLogger(RedshiftCopyBatchInsert.class);
34
- private final RedshiftOutputConnector connector;
35
- private final String s3BucketName;
36
- private final String s3KeyPrefix;
37
- private final String iamReaderUserName;
38
- private final AmazonS3Client s3;
39
- private final AWSSecurityTokenServiceClient sts;
40
-
41
- private RedshiftOutputConnection connection = null;
42
- private String copySqlBeforeFrom = null;
43
- private long totalRows;
44
- private int fileCount;
45
-
46
- public static final String COPY_AFTER_FROM = "GZIP DELIMITER '\\t' NULL '\\\\N' ESCAPE TRUNCATECOLUMNS ACCEPTINVCHARS STATUPDATE OFF COMPUPDATE OFF";
47
-
48
- public RedshiftCopyBatchInsert(RedshiftOutputConnector connector,
49
- AWSCredentialsProvider credentialsProvider, String s3BucketName, String s3KeyPrefix,
50
- String iamReaderUserName) throws IOException, SQLException
51
- {
52
- super();
53
- this.connector = connector;
54
- this.s3BucketName = s3BucketName;
55
- if (s3KeyPrefix.isEmpty() || s3KeyPrefix.endsWith("/")) {
56
- this.s3KeyPrefix = s3KeyPrefix;
57
- } else {
58
- this.s3KeyPrefix = s3KeyPrefix + "/";
59
- }
60
- this.iamReaderUserName = iamReaderUserName;
61
- this.s3 = new AmazonS3Client(credentialsProvider); // TODO options
62
- this.sts = new AWSSecurityTokenServiceClient(credentialsProvider); // options
63
- }
64
-
65
- @Override
66
- public void prepare(String loadTable, JdbcSchema insertSchema) throws SQLException
67
- {
68
- this.connection = connector.connect(true);
69
- this.copySqlBeforeFrom = connection.buildCopySQLBeforeFrom(loadTable, insertSchema);
70
- logger.info("Copy SQL: "+copySqlBeforeFrom+" ? "+COPY_AFTER_FROM);
71
- }
72
-
73
- @Override
74
- protected BufferedWriter openWriter(File newFile) throws IOException
75
- {
76
- // Redshift supports gzip
77
- return new BufferedWriter(
78
- new OutputStreamWriter(
79
- new GZIPOutputStream(new FileOutputStream(newFile)),
80
- FILE_CHARSET)
81
- );
82
- }
83
-
84
- @Override
85
- public void flush() throws IOException, SQLException
86
- {
87
- File file = closeCurrentFile(); // flush buffered data in writer
88
-
89
- // TODO multi-threading
90
- new UploadAndCopyTask(file, batchRows, s3KeyPrefix + UUID.randomUUID().toString()).call();
91
- new DeleteFileFinalizer(file).close();
92
-
93
- fileCount++;
94
- totalRows += batchRows;
95
- batchRows = 0;
96
-
97
- openNewFile();
98
- file.delete();
99
- }
100
-
101
- @Override
102
- public void finish() throws IOException, SQLException
103
- {
104
- super.finish();
105
- logger.info("Loaded {} files.", fileCount);
106
- }
107
-
108
- @Override
109
- public void close() throws IOException, SQLException
110
- {
111
- s3.shutdown();
112
- closeCurrentFile().delete();
113
- if (connection != null) {
114
- connection.close();
115
- connection = null;
116
- }
117
- }
118
-
119
- private BasicSessionCredentials generateReaderSessionCredentials(String s3KeyName)
120
- {
121
- Policy policy = new Policy()
122
- .withStatements(
123
- new Statement(Effect.Allow)
124
- .withActions(S3Actions.ListObjects)
125
- .withResources(new Resource("arn:aws:s3:::"+s3BucketName)),
126
- new Statement(Effect.Allow)
127
- .withActions(S3Actions.GetObject)
128
- .withResources(new Resource("arn:aws:s3:::"+s3BucketName+"/"+s3KeyName)) // TODO encode file name using percent encoding
129
- );
130
- GetFederationTokenRequest req = new GetFederationTokenRequest();
131
- req.setDurationSeconds(86400); // 3600 - 129600
132
- req.setName(iamReaderUserName);
133
- req.setPolicy(policy.toJson());
134
-
135
- GetFederationTokenResult res = sts.getFederationToken(req);
136
- Credentials c = res.getCredentials();
137
-
138
- return new BasicSessionCredentials(
139
- c.getAccessKeyId(),
140
- c.getSecretAccessKey(),
141
- c.getSessionToken());
142
- }
143
-
144
- private class UploadAndCopyTask implements Callable<Void>
145
- {
146
- private final File file;
147
- private final int batchRows;
148
- private final String s3KeyName;
149
-
150
- public UploadAndCopyTask(File file, int batchRows, String s3KeyName)
151
- {
152
- this.file = file;
153
- this.batchRows = batchRows;
154
- this.s3KeyName = s3KeyName;
155
- }
156
-
157
- public Void call() throws SQLException {
158
- logger.info(String.format("Uploading file id %s to S3 (%,d bytes %,d rows)",
159
- s3KeyName, file.length(), batchRows));
160
- s3.putObject(s3BucketName, s3KeyName, file);
161
-
162
- RedshiftOutputConnection con = connector.connect(true);
163
- try {
164
- logger.info("Running COPY from file {}", s3KeyName);
165
-
166
- // create temporary credential right before COPY operation because
167
- // it has timeout.
168
- // TODO skip this step if iamReaderUserName is not set
169
- BasicSessionCredentials creds = generateReaderSessionCredentials(s3KeyName);
170
-
171
- long startTime = System.currentTimeMillis();
172
- con.runCopy(buildCopySQL(creds));
173
- double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
174
-
175
- logger.info(String.format("Loaded file %s (%.2f seconds for COPY)", s3KeyName, seconds));
176
-
177
- } finally {
178
- con.close();
179
- }
180
-
181
- return null;
182
- }
183
-
184
- private String buildCopySQL(BasicSessionCredentials creds)
185
- {
186
- StringBuilder sb = new StringBuilder();
187
- sb.append(copySqlBeforeFrom);
188
- sb.append(" FROM 's3://");
189
- sb.append(s3BucketName);
190
- sb.append("/");
191
- sb.append(s3KeyName);
192
- sb.append("' CREDENTIALS '");
193
- sb.append("aws_access_key_id=");
194
- sb.append(creds.getAWSAccessKeyId());
195
- sb.append(";aws_secret_access_key=");
196
- sb.append(creds.getAWSSecretKey());
197
- sb.append(";token=");
198
- sb.append(creds.getSessionToken());
199
- sb.append("' ");
200
- sb.append(COPY_AFTER_FROM);
201
- return sb.toString();
202
- }
203
- }
204
-
205
- private static class DeleteFileFinalizer implements Closeable
206
- {
207
- private File file;
208
-
209
- public DeleteFileFinalizer(File file) {
210
- this.file = file;
211
- }
212
-
213
- @Override
214
- public void close() throws IOException {
215
- file.delete();
216
- }
217
- }
218
- }
1
+ package org.embulk.output.redshift;
2
+
3
+ import java.util.zip.GZIPOutputStream;
4
+ import java.util.concurrent.Callable;
5
+ import java.util.UUID;
6
+ import java.io.File;
7
+ import java.io.IOException;
8
+ import java.io.FileOutputStream;
9
+ import java.io.OutputStreamWriter;
10
+ import java.io.Closeable;
11
+ import java.io.BufferedWriter;
12
+ import java.sql.SQLException;
13
+ import com.amazonaws.auth.AWSCredentialsProvider;
14
+ import com.amazonaws.auth.BasicSessionCredentials;
15
+ import com.amazonaws.auth.policy.Policy;
16
+ import com.amazonaws.auth.policy.Resource;
17
+ import com.amazonaws.auth.policy.Statement;
18
+ import com.amazonaws.auth.policy.Statement.Effect;
19
+ import com.amazonaws.auth.policy.actions.S3Actions;
20
+ import com.amazonaws.services.s3.AmazonS3Client;
21
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
22
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
23
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
24
+ import com.amazonaws.services.securitytoken.model.Credentials;
25
+ import org.slf4j.Logger;
26
+ import org.embulk.spi.Exec;
27
+ import org.embulk.output.jdbc.JdbcSchema;
28
+ import org.embulk.output.postgresql.AbstractPostgreSQLCopyBatchInsert;
29
+
30
+ public class RedshiftCopyBatchInsert
31
+ extends AbstractPostgreSQLCopyBatchInsert
32
+ {
33
+ private final Logger logger = Exec.getLogger(RedshiftCopyBatchInsert.class);
34
+ private final RedshiftOutputConnector connector;
35
+ private final String s3BucketName;
36
+ private final String s3KeyPrefix;
37
+ private final String iamReaderUserName;
38
+ private final AmazonS3Client s3;
39
+ private final AWSSecurityTokenServiceClient sts;
40
+
41
+ private RedshiftOutputConnection connection = null;
42
+ private String copySqlBeforeFrom = null;
43
+ private long totalRows;
44
+ private int fileCount;
45
+
46
+ public static final String COPY_AFTER_FROM = "GZIP DELIMITER '\\t' NULL '\\\\N' ESCAPE TRUNCATECOLUMNS ACCEPTINVCHARS STATUPDATE OFF COMPUPDATE OFF";
47
+
48
+ public RedshiftCopyBatchInsert(RedshiftOutputConnector connector,
49
+ AWSCredentialsProvider credentialsProvider, String s3BucketName, String s3KeyPrefix,
50
+ String iamReaderUserName) throws IOException, SQLException
51
+ {
52
+ super();
53
+ this.connector = connector;
54
+ this.s3BucketName = s3BucketName;
55
+ if (s3KeyPrefix.isEmpty() || s3KeyPrefix.endsWith("/")) {
56
+ this.s3KeyPrefix = s3KeyPrefix;
57
+ } else {
58
+ this.s3KeyPrefix = s3KeyPrefix + "/";
59
+ }
60
+ this.iamReaderUserName = iamReaderUserName;
61
+ this.s3 = new AmazonS3Client(credentialsProvider); // TODO options
62
+ this.sts = new AWSSecurityTokenServiceClient(credentialsProvider); // options
63
+ }
64
+
65
+ @Override
66
+ public void prepare(String loadTable, JdbcSchema insertSchema) throws SQLException
67
+ {
68
+ this.connection = connector.connect(true);
69
+ this.copySqlBeforeFrom = connection.buildCopySQLBeforeFrom(loadTable, insertSchema);
70
+ logger.info("Copy SQL: "+copySqlBeforeFrom+" ? "+COPY_AFTER_FROM);
71
+ }
72
+
73
+ @Override
74
+ protected BufferedWriter openWriter(File newFile) throws IOException
75
+ {
76
+ // Redshift supports gzip
77
+ return new BufferedWriter(
78
+ new OutputStreamWriter(
79
+ new GZIPOutputStream(new FileOutputStream(newFile)),
80
+ FILE_CHARSET)
81
+ );
82
+ }
83
+
84
+ @Override
85
+ public void flush() throws IOException, SQLException
86
+ {
87
+ File file = closeCurrentFile(); // flush buffered data in writer
88
+
89
+ // TODO multi-threading
90
+ new UploadAndCopyTask(file, batchRows, s3KeyPrefix + UUID.randomUUID().toString()).call();
91
+ new DeleteFileFinalizer(file).close();
92
+
93
+ fileCount++;
94
+ totalRows += batchRows;
95
+ batchRows = 0;
96
+
97
+ openNewFile();
98
+ file.delete();
99
+ }
100
+
101
+ @Override
102
+ public void finish() throws IOException, SQLException
103
+ {
104
+ super.finish();
105
+ logger.info("Loaded {} files.", fileCount);
106
+ }
107
+
108
+ @Override
109
+ public void close() throws IOException, SQLException
110
+ {
111
+ s3.shutdown();
112
+ closeCurrentFile().delete();
113
+ if (connection != null) {
114
+ connection.close();
115
+ connection = null;
116
+ }
117
+ }
118
+
119
+ private BasicSessionCredentials generateReaderSessionCredentials(String s3KeyName)
120
+ {
121
+ Policy policy = new Policy()
122
+ .withStatements(
123
+ new Statement(Effect.Allow)
124
+ .withActions(S3Actions.ListObjects)
125
+ .withResources(new Resource("arn:aws:s3:::"+s3BucketName)),
126
+ new Statement(Effect.Allow)
127
+ .withActions(S3Actions.GetObject)
128
+ .withResources(new Resource("arn:aws:s3:::"+s3BucketName+"/"+s3KeyName)) // TODO encode file name using percent encoding
129
+ );
130
+ GetFederationTokenRequest req = new GetFederationTokenRequest();
131
+ req.setDurationSeconds(86400); // 3600 - 129600
132
+ req.setName(iamReaderUserName);
133
+ req.setPolicy(policy.toJson());
134
+
135
+ GetFederationTokenResult res = sts.getFederationToken(req);
136
+ Credentials c = res.getCredentials();
137
+
138
+ return new BasicSessionCredentials(
139
+ c.getAccessKeyId(),
140
+ c.getSecretAccessKey(),
141
+ c.getSessionToken());
142
+ }
143
+
144
+ private class UploadAndCopyTask implements Callable<Void>
145
+ {
146
+ private final File file;
147
+ private final int batchRows;
148
+ private final String s3KeyName;
149
+
150
+ public UploadAndCopyTask(File file, int batchRows, String s3KeyName)
151
+ {
152
+ this.file = file;
153
+ this.batchRows = batchRows;
154
+ this.s3KeyName = s3KeyName;
155
+ }
156
+
157
+ public Void call() throws SQLException {
158
+ logger.info(String.format("Uploading file id %s to S3 (%,d bytes %,d rows)",
159
+ s3KeyName, file.length(), batchRows));
160
+ s3.putObject(s3BucketName, s3KeyName, file);
161
+
162
+ RedshiftOutputConnection con = connector.connect(true);
163
+ try {
164
+ logger.info("Running COPY from file {}", s3KeyName);
165
+
166
+ // create temporary credential right before COPY operation because
167
+ // it has timeout.
168
+ // TODO skip this step if iamReaderUserName is not set
169
+ BasicSessionCredentials creds = generateReaderSessionCredentials(s3KeyName);
170
+
171
+ long startTime = System.currentTimeMillis();
172
+ con.runCopy(buildCopySQL(creds));
173
+ double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
174
+
175
+ logger.info(String.format("Loaded file %s (%.2f seconds for COPY)", s3KeyName, seconds));
176
+
177
+ } finally {
178
+ con.close();
179
+ }
180
+
181
+ return null;
182
+ }
183
+
184
+ private String buildCopySQL(BasicSessionCredentials creds)
185
+ {
186
+ StringBuilder sb = new StringBuilder();
187
+ sb.append(copySqlBeforeFrom);
188
+ sb.append(" FROM 's3://");
189
+ sb.append(s3BucketName);
190
+ sb.append("/");
191
+ sb.append(s3KeyName);
192
+ sb.append("' CREDENTIALS '");
193
+ sb.append("aws_access_key_id=");
194
+ sb.append(creds.getAWSAccessKeyId());
195
+ sb.append(";aws_secret_access_key=");
196
+ sb.append(creds.getAWSSecretKey());
197
+ sb.append(";token=");
198
+ sb.append(creds.getSessionToken());
199
+ sb.append("' ");
200
+ sb.append(COPY_AFTER_FROM);
201
+ return sb.toString();
202
+ }
203
+ }
204
+
205
+ private static class DeleteFileFinalizer implements Closeable
206
+ {
207
+ private File file;
208
+
209
+ public DeleteFileFinalizer(File file) {
210
+ this.file = file;
211
+ }
212
+
213
+ @Override
214
+ public void close() throws IOException {
215
+ file.delete();
216
+ }
217
+ }
218
+ }