embulk-output-redshift 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +104 -104
  3. data/build.gradle +9 -9
  4. data/classpath/{aws-java-sdk-core-1.9.17.jar → aws-java-sdk-core-1.10.33.jar} +0 -0
  5. data/classpath/aws-java-sdk-kms-1.10.33.jar +0 -0
  6. data/classpath/aws-java-sdk-s3-1.10.33.jar +0 -0
  7. data/classpath/aws-java-sdk-sts-1.10.33.jar +0 -0
  8. data/classpath/embulk-output-jdbc-0.4.2.jar +0 -0
  9. data/classpath/embulk-output-postgresql-0.4.2.jar +0 -0
  10. data/classpath/{embulk-output-redshift-0.4.1.jar → embulk-output-redshift-0.4.2.jar} +0 -0
  11. data/classpath/{httpclient-4.3.4.jar → httpclient-4.3.6.jar} +0 -0
  12. data/classpath/{httpcore-4.3.2.jar → httpcore-4.3.3.jar} +0 -0
  13. data/classpath/postgresql-9.4-1205-jdbc41.jar +0 -0
  14. data/lib/embulk/output/redshift.rb +3 -3
  15. data/src/main/java/org/embulk/output/RedshiftOutputPlugin.java +151 -151
  16. data/src/main/java/org/embulk/output/redshift/RedshiftCopyBatchInsert.java +218 -218
  17. data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnection.java +122 -122
  18. data/src/main/java/org/embulk/output/redshift/RedshiftOutputConnector.java +40 -40
  19. metadata +12 -17
  20. data/classpath/aws-java-sdk-kms-1.9.17.jar +0 -0
  21. data/classpath/aws-java-sdk-s3-1.9.17.jar +0 -0
  22. data/classpath/aws-java-sdk-sts-1.9.17.jar +0 -0
  23. data/classpath/embulk-output-jdbc-0.4.1.jar +0 -0
  24. data/classpath/embulk-output-postgresql-0.4.1.jar +0 -0
  25. data/classpath/jna-4.1.0.jar +0 -0
  26. data/classpath/jna-platform-4.1.0.jar +0 -0
  27. data/classpath/joda-time-2.8.1.jar +0 -0
  28. data/classpath/postgresql-9.4-1200-jdbc41.jar +0 -0
  29. data/classpath/slf4j-simple-1.7.7.jar +0 -0
  30. data/classpath/waffle-jna-1.7.jar +0 -0
@@ -1,218 +1,218 @@
1
- package org.embulk.output.redshift;
2
-
3
- import java.util.zip.GZIPOutputStream;
4
- import java.util.concurrent.Callable;
5
- import java.util.UUID;
6
- import java.io.File;
7
- import java.io.IOException;
8
- import java.io.FileOutputStream;
9
- import java.io.OutputStreamWriter;
10
- import java.io.Closeable;
11
- import java.io.BufferedWriter;
12
- import java.sql.SQLException;
13
- import com.amazonaws.auth.AWSCredentialsProvider;
14
- import com.amazonaws.auth.BasicSessionCredentials;
15
- import com.amazonaws.auth.policy.Policy;
16
- import com.amazonaws.auth.policy.Resource;
17
- import com.amazonaws.auth.policy.Statement;
18
- import com.amazonaws.auth.policy.Statement.Effect;
19
- import com.amazonaws.auth.policy.actions.S3Actions;
20
- import com.amazonaws.services.s3.AmazonS3Client;
21
- import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
22
- import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
23
- import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
24
- import com.amazonaws.services.securitytoken.model.Credentials;
25
- import org.slf4j.Logger;
26
- import org.embulk.spi.Exec;
27
- import org.embulk.output.jdbc.JdbcSchema;
28
- import org.embulk.output.postgresql.AbstractPostgreSQLCopyBatchInsert;
29
-
30
- public class RedshiftCopyBatchInsert
31
- extends AbstractPostgreSQLCopyBatchInsert
32
- {
33
- private final Logger logger = Exec.getLogger(RedshiftCopyBatchInsert.class);
34
- private final RedshiftOutputConnector connector;
35
- private final String s3BucketName;
36
- private final String s3KeyPrefix;
37
- private final String iamReaderUserName;
38
- private final AmazonS3Client s3;
39
- private final AWSSecurityTokenServiceClient sts;
40
-
41
- private RedshiftOutputConnection connection = null;
42
- private String copySqlBeforeFrom = null;
43
- private long totalRows;
44
- private int fileCount;
45
-
46
- public static final String COPY_AFTER_FROM = "GZIP DELIMITER '\\t' NULL '\\\\N' ESCAPE TRUNCATECOLUMNS ACCEPTINVCHARS STATUPDATE OFF COMPUPDATE OFF";
47
-
48
- public RedshiftCopyBatchInsert(RedshiftOutputConnector connector,
49
- AWSCredentialsProvider credentialsProvider, String s3BucketName, String s3KeyPrefix,
50
- String iamReaderUserName) throws IOException, SQLException
51
- {
52
- super();
53
- this.connector = connector;
54
- this.s3BucketName = s3BucketName;
55
- if (s3KeyPrefix.isEmpty() || s3KeyPrefix.endsWith("/")) {
56
- this.s3KeyPrefix = s3KeyPrefix;
57
- } else {
58
- this.s3KeyPrefix = s3KeyPrefix + "/";
59
- }
60
- this.iamReaderUserName = iamReaderUserName;
61
- this.s3 = new AmazonS3Client(credentialsProvider); // TODO options
62
- this.sts = new AWSSecurityTokenServiceClient(credentialsProvider); // options
63
- }
64
-
65
- @Override
66
- public void prepare(String loadTable, JdbcSchema insertSchema) throws SQLException
67
- {
68
- this.connection = connector.connect(true);
69
- this.copySqlBeforeFrom = connection.buildCopySQLBeforeFrom(loadTable, insertSchema);
70
- logger.info("Copy SQL: "+copySqlBeforeFrom+" ? "+COPY_AFTER_FROM);
71
- }
72
-
73
- @Override
74
- protected BufferedWriter openWriter(File newFile) throws IOException
75
- {
76
- // Redshift supports gzip
77
- return new BufferedWriter(
78
- new OutputStreamWriter(
79
- new GZIPOutputStream(new FileOutputStream(newFile)),
80
- FILE_CHARSET)
81
- );
82
- }
83
-
84
- @Override
85
- public void flush() throws IOException, SQLException
86
- {
87
- File file = closeCurrentFile(); // flush buffered data in writer
88
-
89
- // TODO multi-threading
90
- new UploadAndCopyTask(file, batchRows, s3KeyPrefix + UUID.randomUUID().toString()).call();
91
- new DeleteFileFinalizer(file).close();
92
-
93
- fileCount++;
94
- totalRows += batchRows;
95
- batchRows = 0;
96
-
97
- openNewFile();
98
- file.delete();
99
- }
100
-
101
- @Override
102
- public void finish() throws IOException, SQLException
103
- {
104
- super.finish();
105
- logger.info("Loaded {} files.", fileCount);
106
- }
107
-
108
- @Override
109
- public void close() throws IOException, SQLException
110
- {
111
- s3.shutdown();
112
- closeCurrentFile().delete();
113
- if (connection != null) {
114
- connection.close();
115
- connection = null;
116
- }
117
- }
118
-
119
- private BasicSessionCredentials generateReaderSessionCredentials(String s3KeyName)
120
- {
121
- Policy policy = new Policy()
122
- .withStatements(
123
- new Statement(Effect.Allow)
124
- .withActions(S3Actions.ListObjects)
125
- .withResources(new Resource("arn:aws:s3:::"+s3BucketName)),
126
- new Statement(Effect.Allow)
127
- .withActions(S3Actions.GetObject)
128
- .withResources(new Resource("arn:aws:s3:::"+s3BucketName+"/"+s3KeyName)) // TODO encode file name using percent encoding
129
- );
130
- GetFederationTokenRequest req = new GetFederationTokenRequest();
131
- req.setDurationSeconds(86400); // 3600 - 129600
132
- req.setName(iamReaderUserName);
133
- req.setPolicy(policy.toJson());
134
-
135
- GetFederationTokenResult res = sts.getFederationToken(req);
136
- Credentials c = res.getCredentials();
137
-
138
- return new BasicSessionCredentials(
139
- c.getAccessKeyId(),
140
- c.getSecretAccessKey(),
141
- c.getSessionToken());
142
- }
143
-
144
- private class UploadAndCopyTask implements Callable<Void>
145
- {
146
- private final File file;
147
- private final int batchRows;
148
- private final String s3KeyName;
149
-
150
- public UploadAndCopyTask(File file, int batchRows, String s3KeyName)
151
- {
152
- this.file = file;
153
- this.batchRows = batchRows;
154
- this.s3KeyName = s3KeyName;
155
- }
156
-
157
- public Void call() throws SQLException {
158
- logger.info(String.format("Uploading file id %s to S3 (%,d bytes %,d rows)",
159
- s3KeyName, file.length(), batchRows));
160
- s3.putObject(s3BucketName, s3KeyName, file);
161
-
162
- RedshiftOutputConnection con = connector.connect(true);
163
- try {
164
- logger.info("Running COPY from file {}", s3KeyName);
165
-
166
- // create temporary credential right before COPY operation because
167
- // it has timeout.
168
- // TODO skip this step if iamReaderUserName is not set
169
- BasicSessionCredentials creds = generateReaderSessionCredentials(s3KeyName);
170
-
171
- long startTime = System.currentTimeMillis();
172
- con.runCopy(buildCopySQL(creds));
173
- double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
174
-
175
- logger.info(String.format("Loaded file %s (%.2f seconds for COPY)", s3KeyName, seconds));
176
-
177
- } finally {
178
- con.close();
179
- }
180
-
181
- return null;
182
- }
183
-
184
- private String buildCopySQL(BasicSessionCredentials creds)
185
- {
186
- StringBuilder sb = new StringBuilder();
187
- sb.append(copySqlBeforeFrom);
188
- sb.append(" FROM 's3://");
189
- sb.append(s3BucketName);
190
- sb.append("/");
191
- sb.append(s3KeyName);
192
- sb.append("' CREDENTIALS '");
193
- sb.append("aws_access_key_id=");
194
- sb.append(creds.getAWSAccessKeyId());
195
- sb.append(";aws_secret_access_key=");
196
- sb.append(creds.getAWSSecretKey());
197
- sb.append(";token=");
198
- sb.append(creds.getSessionToken());
199
- sb.append("' ");
200
- sb.append(COPY_AFTER_FROM);
201
- return sb.toString();
202
- }
203
- }
204
-
205
- private static class DeleteFileFinalizer implements Closeable
206
- {
207
- private File file;
208
-
209
- public DeleteFileFinalizer(File file) {
210
- this.file = file;
211
- }
212
-
213
- @Override
214
- public void close() throws IOException {
215
- file.delete();
216
- }
217
- }
218
- }
1
+ package org.embulk.output.redshift;
2
+
3
+ import java.util.zip.GZIPOutputStream;
4
+ import java.util.concurrent.Callable;
5
+ import java.util.UUID;
6
+ import java.io.File;
7
+ import java.io.IOException;
8
+ import java.io.FileOutputStream;
9
+ import java.io.OutputStreamWriter;
10
+ import java.io.Closeable;
11
+ import java.io.BufferedWriter;
12
+ import java.sql.SQLException;
13
+ import com.amazonaws.auth.AWSCredentialsProvider;
14
+ import com.amazonaws.auth.BasicSessionCredentials;
15
+ import com.amazonaws.auth.policy.Policy;
16
+ import com.amazonaws.auth.policy.Resource;
17
+ import com.amazonaws.auth.policy.Statement;
18
+ import com.amazonaws.auth.policy.Statement.Effect;
19
+ import com.amazonaws.auth.policy.actions.S3Actions;
20
+ import com.amazonaws.services.s3.AmazonS3Client;
21
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
22
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
23
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
24
+ import com.amazonaws.services.securitytoken.model.Credentials;
25
+ import org.slf4j.Logger;
26
+ import org.embulk.spi.Exec;
27
+ import org.embulk.output.jdbc.JdbcSchema;
28
+ import org.embulk.output.postgresql.AbstractPostgreSQLCopyBatchInsert;
29
+
30
+ public class RedshiftCopyBatchInsert
31
+ extends AbstractPostgreSQLCopyBatchInsert
32
+ {
33
+ private final Logger logger = Exec.getLogger(RedshiftCopyBatchInsert.class);
34
+ private final RedshiftOutputConnector connector;
35
+ private final String s3BucketName;
36
+ private final String s3KeyPrefix;
37
+ private final String iamReaderUserName;
38
+ private final AmazonS3Client s3;
39
+ private final AWSSecurityTokenServiceClient sts;
40
+
41
+ private RedshiftOutputConnection connection = null;
42
+ private String copySqlBeforeFrom = null;
43
+ private long totalRows;
44
+ private int fileCount;
45
+
46
+ public static final String COPY_AFTER_FROM = "GZIP DELIMITER '\\t' NULL '\\\\N' ESCAPE TRUNCATECOLUMNS ACCEPTINVCHARS STATUPDATE OFF COMPUPDATE OFF";
47
+
48
+ public RedshiftCopyBatchInsert(RedshiftOutputConnector connector,
49
+ AWSCredentialsProvider credentialsProvider, String s3BucketName, String s3KeyPrefix,
50
+ String iamReaderUserName) throws IOException, SQLException
51
+ {
52
+ super();
53
+ this.connector = connector;
54
+ this.s3BucketName = s3BucketName;
55
+ if (s3KeyPrefix.isEmpty() || s3KeyPrefix.endsWith("/")) {
56
+ this.s3KeyPrefix = s3KeyPrefix;
57
+ } else {
58
+ this.s3KeyPrefix = s3KeyPrefix + "/";
59
+ }
60
+ this.iamReaderUserName = iamReaderUserName;
61
+ this.s3 = new AmazonS3Client(credentialsProvider); // TODO options
62
+ this.sts = new AWSSecurityTokenServiceClient(credentialsProvider); // options
63
+ }
64
+
65
+ @Override
66
+ public void prepare(String loadTable, JdbcSchema insertSchema) throws SQLException
67
+ {
68
+ this.connection = connector.connect(true);
69
+ this.copySqlBeforeFrom = connection.buildCopySQLBeforeFrom(loadTable, insertSchema);
70
+ logger.info("Copy SQL: "+copySqlBeforeFrom+" ? "+COPY_AFTER_FROM);
71
+ }
72
+
73
+ @Override
74
+ protected BufferedWriter openWriter(File newFile) throws IOException
75
+ {
76
+ // Redshift supports gzip
77
+ return new BufferedWriter(
78
+ new OutputStreamWriter(
79
+ new GZIPOutputStream(new FileOutputStream(newFile)),
80
+ FILE_CHARSET)
81
+ );
82
+ }
83
+
84
+ @Override
85
+ public void flush() throws IOException, SQLException
86
+ {
87
+ File file = closeCurrentFile(); // flush buffered data in writer
88
+
89
+ // TODO multi-threading
90
+ new UploadAndCopyTask(file, batchRows, s3KeyPrefix + UUID.randomUUID().toString()).call();
91
+ new DeleteFileFinalizer(file).close();
92
+
93
+ fileCount++;
94
+ totalRows += batchRows;
95
+ batchRows = 0;
96
+
97
+ openNewFile();
98
+ file.delete();
99
+ }
100
+
101
+ @Override
102
+ public void finish() throws IOException, SQLException
103
+ {
104
+ super.finish();
105
+ logger.info("Loaded {} files.", fileCount);
106
+ }
107
+
108
+ @Override
109
+ public void close() throws IOException, SQLException
110
+ {
111
+ s3.shutdown();
112
+ closeCurrentFile().delete();
113
+ if (connection != null) {
114
+ connection.close();
115
+ connection = null;
116
+ }
117
+ }
118
+
119
+ private BasicSessionCredentials generateReaderSessionCredentials(String s3KeyName)
120
+ {
121
+ Policy policy = new Policy()
122
+ .withStatements(
123
+ new Statement(Effect.Allow)
124
+ .withActions(S3Actions.ListObjects)
125
+ .withResources(new Resource("arn:aws:s3:::"+s3BucketName)),
126
+ new Statement(Effect.Allow)
127
+ .withActions(S3Actions.GetObject)
128
+ .withResources(new Resource("arn:aws:s3:::"+s3BucketName+"/"+s3KeyName)) // TODO encode file name using percent encoding
129
+ );
130
+ GetFederationTokenRequest req = new GetFederationTokenRequest();
131
+ req.setDurationSeconds(86400); // 3600 - 129600
132
+ req.setName(iamReaderUserName);
133
+ req.setPolicy(policy.toJson());
134
+
135
+ GetFederationTokenResult res = sts.getFederationToken(req);
136
+ Credentials c = res.getCredentials();
137
+
138
+ return new BasicSessionCredentials(
139
+ c.getAccessKeyId(),
140
+ c.getSecretAccessKey(),
141
+ c.getSessionToken());
142
+ }
143
+
144
+ private class UploadAndCopyTask implements Callable<Void>
145
+ {
146
+ private final File file;
147
+ private final int batchRows;
148
+ private final String s3KeyName;
149
+
150
+ public UploadAndCopyTask(File file, int batchRows, String s3KeyName)
151
+ {
152
+ this.file = file;
153
+ this.batchRows = batchRows;
154
+ this.s3KeyName = s3KeyName;
155
+ }
156
+
157
+ public Void call() throws SQLException {
158
+ logger.info(String.format("Uploading file id %s to S3 (%,d bytes %,d rows)",
159
+ s3KeyName, file.length(), batchRows));
160
+ s3.putObject(s3BucketName, s3KeyName, file);
161
+
162
+ RedshiftOutputConnection con = connector.connect(true);
163
+ try {
164
+ logger.info("Running COPY from file {}", s3KeyName);
165
+
166
+ // create temporary credential right before COPY operation because
167
+ // it has timeout.
168
+ // TODO skip this step if iamReaderUserName is not set
169
+ BasicSessionCredentials creds = generateReaderSessionCredentials(s3KeyName);
170
+
171
+ long startTime = System.currentTimeMillis();
172
+ con.runCopy(buildCopySQL(creds));
173
+ double seconds = (System.currentTimeMillis() - startTime) / 1000.0;
174
+
175
+ logger.info(String.format("Loaded file %s (%.2f seconds for COPY)", s3KeyName, seconds));
176
+
177
+ } finally {
178
+ con.close();
179
+ }
180
+
181
+ return null;
182
+ }
183
+
184
+ private String buildCopySQL(BasicSessionCredentials creds)
185
+ {
186
+ StringBuilder sb = new StringBuilder();
187
+ sb.append(copySqlBeforeFrom);
188
+ sb.append(" FROM 's3://");
189
+ sb.append(s3BucketName);
190
+ sb.append("/");
191
+ sb.append(s3KeyName);
192
+ sb.append("' CREDENTIALS '");
193
+ sb.append("aws_access_key_id=");
194
+ sb.append(creds.getAWSAccessKeyId());
195
+ sb.append(";aws_secret_access_key=");
196
+ sb.append(creds.getAWSSecretKey());
197
+ sb.append(";token=");
198
+ sb.append(creds.getSessionToken());
199
+ sb.append("' ");
200
+ sb.append(COPY_AFTER_FROM);
201
+ return sb.toString();
202
+ }
203
+ }
204
+
205
+ private static class DeleteFileFinalizer implements Closeable
206
+ {
207
+ private File file;
208
+
209
+ public DeleteFileFinalizer(File file) {
210
+ this.file = file;
211
+ }
212
+
213
+ @Override
214
+ public void close() throws IOException {
215
+ file.delete();
216
+ }
217
+ }
218
+ }