embulk-input-s3 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd67a9aea3d1352358739b2c6917f3204301443e
4
- data.tar.gz: b143676db2502a2e39d7cb430bd4bf8f67933a5b
3
+ metadata.gz: 02cd2ac88b83d92846715d194928f38a32693ebe
4
+ data.tar.gz: 145815003f894fc2a2a224266e00908dae89cf79
5
5
  SHA512:
6
- metadata.gz: 08b84503dda64c6d4e3765d66a368070d47bf1214d96ac9f5ff77478cc888782c799fb193a0925ce508b8b0867ead0e2bde4185db2f92eb0433807c400cd18d6
7
- data.tar.gz: b6541d55d9e4b7317267b3f57eaed9795c0b1ff7591f4de240890e2b5f0580575b39461e066fbf85bef5e894e54c201641721195899b9246f50f79756e703239
6
+ metadata.gz: e9d81495964337639a2a6e7de2ce9ca3c3e5b39ca869ff1873e5ed119de95ac8ae778fe3450d85068dcd7b773bbfd8345835b46ab226ad73f1117c2fb614c0bc
7
+ data.tar.gz: 5f50eb790d085fb427a23e3819bdada69bb0cafba84e6e4175a8a317e7cf7e37e0a675866a87b893600c6ccfeb59654662e452c56d93a556d5559319c2760543
@@ -1,41 +1,3 @@
1
- plugins {
2
- id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "0.1.5"
4
- id "java"
5
- }
6
- import com.github.jrubygradle.JRubyExec
7
- repositories {
8
- mavenCentral()
9
- mavenLocal()
10
- jcenter()
11
- }
12
- configurations {
13
- provided
14
- }
15
-
16
- version = "0.1.7"
17
-
18
- dependencies {
19
- compile "org.embulk:embulk-core:0.5.3"
20
- provided "org.embulk:embulk-core:0.5.3"
21
- compile "com.amazonaws:aws-java-sdk-s3:1.9.22"
22
- testCompile "junit:junit:4.+"
23
- testCompile "org.mockito:mockito-core:1.+"
24
- }
25
-
26
- task classpath(type: Copy, dependsOn: ["jar"]) {
27
- doFirst { file("classpath").deleteDir() }
28
- from (configurations.runtime - configurations.provided + files(jar.archivePath))
29
- into "classpath"
30
- }
31
- clean { delete 'classpath' }
32
-
33
- task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
34
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
35
- script "build/gemspec"
36
- doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
37
- }
38
-
39
1
  task gemspec << { file("build/gemspec").write($/
40
2
  Gem::Specification.new do |spec|
41
3
  spec.name = "${project.name}"
@@ -0,0 +1,318 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Collections;
6
+ import java.io.IOException;
7
+ import java.io.InterruptedIOException;
8
+ import java.io.InputStream;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.common.base.Optional;
11
+ import com.google.common.base.Throwables;
12
+ import org.slf4j.Logger;
13
+ import com.amazonaws.auth.AWSCredentials;
14
+ import com.amazonaws.auth.AWSCredentialsProvider;
15
+ import com.amazonaws.auth.BasicAWSCredentials;
16
+ import com.amazonaws.services.s3.AmazonS3Client;
17
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
18
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
19
+ import com.amazonaws.services.s3.model.ObjectListing;
20
+ import com.amazonaws.services.s3.model.GetObjectRequest;
21
+ import com.amazonaws.services.s3.model.S3Object;
22
+ import com.amazonaws.ClientConfiguration;
23
+ import com.amazonaws.Protocol;
24
+ import org.embulk.config.Config;
25
+ import org.embulk.config.ConfigInject;
26
+ import org.embulk.config.ConfigDefault;
27
+ import org.embulk.config.Task;
28
+ import org.embulk.config.TaskSource;
29
+ import org.embulk.config.ConfigSource;
30
+ import org.embulk.config.ConfigDiff;
31
+ import org.embulk.config.CommitReport;
32
+ import org.embulk.spi.BufferAllocator;
33
+ import org.embulk.spi.Exec;
34
+ import org.embulk.spi.FileInputPlugin;
35
+ import org.embulk.spi.TransactionalFileInput;
36
+ import org.embulk.spi.util.InputStreamFileInput;
37
+ import org.embulk.spi.util.ResumableInputStream;
38
+ import org.embulk.spi.util.RetryExecutor.Retryable;
39
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
40
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
41
+
42
+ public abstract class AbstractS3FileInputPlugin
43
+ implements FileInputPlugin
44
+ {
45
+ public interface PluginTask
46
+ extends Task
47
+ {
48
+ @Config("bucket")
49
+ public String getBucket();
50
+
51
+ @Config("path_prefix")
52
+ public String getPathPrefix();
53
+
54
+ @Config("last_path")
55
+ @ConfigDefault("null")
56
+ public Optional<String> getLastPath();
57
+
58
+ @Config("access_key_id")
59
+ public String getAccessKeyId();
60
+
61
+ @Config("secret_access_key")
62
+ public String getSecretAccessKey();
63
+
64
+ // TODO timeout, ssl, etc
65
+
66
+ // TODO support more options such as STS
67
+
68
+ public List<String> getFiles();
69
+ public void setFiles(List<String> files);
70
+
71
+ @ConfigInject
72
+ public BufferAllocator getBufferAllocator();
73
+ }
74
+
75
+ protected abstract Class<? extends PluginTask> getTaskClass();
76
+
77
+ @Override
78
+ public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
79
+ {
80
+ PluginTask task = config.loadConfig(getTaskClass());
81
+
82
+ // list files recursively
83
+ task.setFiles(listFiles(task));
84
+
85
+ // number of processors is same with number of files
86
+ return resume(task.dump(), task.getFiles().size(), control);
87
+ }
88
+
89
+ @Override
90
+ public ConfigDiff resume(TaskSource taskSource,
91
+ int taskCount,
92
+ FileInputPlugin.Control control)
93
+ {
94
+ PluginTask task = taskSource.loadTask(getTaskClass());
95
+
96
+ // validate task
97
+ newS3Client(task);
98
+
99
+ control.run(taskSource, taskCount);
100
+
101
+ // build next config
102
+ ConfigDiff configDiff = Exec.newConfigDiff();
103
+
104
+ // last_path
105
+ if (task.getFiles().isEmpty()) {
106
+ // keep the last value
107
+ if (task.getLastPath().isPresent()) {
108
+ configDiff.set("last_path", task.getLastPath().get());
109
+ }
110
+ } else {
111
+ List<String> files = new ArrayList<String>(task.getFiles());
112
+ Collections.sort(files);
113
+ configDiff.set("last_path", files.get(files.size() - 1));
114
+ }
115
+
116
+ return configDiff;
117
+ }
118
+
119
+ @Override
120
+ public void cleanup(TaskSource taskSource,
121
+ int taskCount,
122
+ List<CommitReport> successCommitReports)
123
+ {
124
+ // do nothing
125
+ }
126
+
127
+ protected AmazonS3Client newS3Client(PluginTask task)
128
+ {
129
+ return new AmazonS3Client(getCredentialsProvider(task), getClientConfiguration(task));
130
+ }
131
+
132
+ protected AWSCredentialsProvider getCredentialsProvider(PluginTask task)
133
+ {
134
+ final AWSCredentials cred = new BasicAWSCredentials(
135
+ task.getAccessKeyId(), task.getSecretAccessKey());
136
+ return new AWSCredentialsProvider() {
137
+ public AWSCredentials getCredentials()
138
+ {
139
+ return cred;
140
+ }
141
+
142
+ public void refresh()
143
+ {
144
+ }
145
+ };
146
+ }
147
+
148
+ protected ClientConfiguration getClientConfiguration(PluginTask task)
149
+ {
150
+ ClientConfiguration clientConfig = new ClientConfiguration();
151
+
152
+ //clientConfig.setProtocol(Protocol.HTTP);
153
+ clientConfig.setMaxConnections(50); // SDK default: 50
154
+ clientConfig.setMaxErrorRetry(3); // SDK default: 3
155
+ clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
156
+
157
+ return clientConfig;
158
+ }
159
+
160
+ private List<String> listFiles(PluginTask task)
161
+ {
162
+ AmazonS3Client client = newS3Client(task);
163
+ String bucketName = task.getBucket();
164
+
165
+ return listS3FilesByPrefix(client, bucketName, task.getPathPrefix(), task.getLastPath());
166
+ }
167
+
168
+ /**
169
+ * Lists S3 filenames filtered by prefix.
170
+ *
171
+ * The resulting list does not include the file that's size == 0.
172
+ */
173
+ public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName,
174
+ String prefix, Optional<String> lastPath)
175
+ {
176
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
177
+
178
+ String lastKey = lastPath.orNull();
179
+ do {
180
+ ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
181
+ ObjectListing ol = client.listObjects(req);
182
+ for(S3ObjectSummary s : ol.getObjectSummaries()) {
183
+ if (s.getSize() > 0) {
184
+ builder.add(s.getKey());
185
+ }
186
+ }
187
+ lastKey = ol.getNextMarker();
188
+ } while(lastKey != null);
189
+
190
+ return builder.build();
191
+ }
192
+
193
+ @Override
194
+ public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
195
+ {
196
+ PluginTask task = taskSource.loadTask(getTaskClass());
197
+ return new S3FileInput(task, taskIndex);
198
+ }
199
+
200
+ private static class S3InputStreamReopener
201
+ implements ResumableInputStream.Reopener
202
+ {
203
+ private final Logger log = Exec.getLogger(S3InputStreamReopener.class);
204
+
205
+ private final AmazonS3Client client;
206
+ private final GetObjectRequest request;
207
+ private final long contentLength;
208
+
209
+ public S3InputStreamReopener(AmazonS3Client client, GetObjectRequest request, long contentLength)
210
+ {
211
+ this.client = client;
212
+ this.request = request;
213
+ this.contentLength = contentLength;
214
+ }
215
+
216
+ @Override
217
+ public InputStream reopen(final long offset, final Exception closedCause) throws IOException
218
+ {
219
+ try {
220
+ return retryExecutor()
221
+ .withRetryLimit(3)
222
+ .withInitialRetryWait(500)
223
+ .withMaxRetryWait(30*1000)
224
+ .runInterruptible(new Retryable<InputStream>() {
225
+ @Override
226
+ public InputStream call() throws InterruptedIOException
227
+ {
228
+ log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
229
+ request.setRange(offset, contentLength - 1); // [first, last]
230
+ return client.getObject(request).getObjectContent();
231
+ }
232
+
233
+ @Override
234
+ public boolean isRetryableException(Exception exception)
235
+ {
236
+ return true; // TODO
237
+ }
238
+
239
+ @Override
240
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
241
+ throws RetryGiveupException
242
+ {
243
+ String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
244
+ retryCount, retryLimit, retryWait/1000, exception.getMessage());
245
+ if (retryCount % 3 == 0) {
246
+ log.warn(message, exception);
247
+ } else {
248
+ log.warn(message);
249
+ }
250
+ }
251
+
252
+ @Override
253
+ public void onGiveup(Exception firstException, Exception lastException)
254
+ throws RetryGiveupException
255
+ {
256
+ }
257
+ });
258
+ } catch (RetryGiveupException ex) {
259
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
260
+ throw Throwables.propagate(ex.getCause());
261
+ } catch (InterruptedException ex) {
262
+ throw new InterruptedIOException();
263
+ }
264
+ }
265
+ }
266
+
267
+ public class S3FileInput
268
+ extends InputStreamFileInput
269
+ implements TransactionalFileInput
270
+ {
271
+ public S3FileInput(PluginTask task, int taskIndex)
272
+ {
273
+ super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
274
+ }
275
+
276
+ public void abort() { }
277
+
278
+ public CommitReport commit()
279
+ {
280
+ return Exec.newCommitReport();
281
+ }
282
+
283
+ @Override
284
+ public void close() { }
285
+ }
286
+
287
+ // TODO create single-file InputStreamFileInput utility
288
+ private class SingleFileProvider
289
+ implements InputStreamFileInput.Provider
290
+ {
291
+ private AmazonS3Client client;
292
+ private final String bucket;
293
+ private final String key;
294
+ private boolean opened = false;
295
+
296
+ public SingleFileProvider(PluginTask task, int taskIndex)
297
+ {
298
+ this.client = newS3Client(task);
299
+ this.bucket = task.getBucket();
300
+ this.key = task.getFiles().get(taskIndex);
301
+ }
302
+
303
+ @Override
304
+ public InputStream openNext() throws IOException
305
+ {
306
+ if (opened) {
307
+ return null;
308
+ }
309
+ opened = true;
310
+ GetObjectRequest request = new GetObjectRequest(bucket, key);
311
+ S3Object obj = client.getObject(request);
312
+ return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, obj.getObjectMetadata().getContentLength()));
313
+ }
314
+
315
+ @Override
316
+ public void close() { }
317
+ }
318
+ }
@@ -1,327 +1,39 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import java.util.List;
4
- import java.util.ArrayList;
5
- import java.util.Collections;
6
- import java.io.IOException;
7
- import java.io.InterruptedIOException;
8
- import java.io.InputStream;
9
- import com.google.common.collect.ImmutableList;
10
3
  import com.google.common.base.Optional;
11
- import com.google.common.base.Throwables;
12
- import org.slf4j.Logger;
13
- import com.amazonaws.auth.AWSCredentials;
14
- import com.amazonaws.auth.AWSCredentialsProvider;
15
- import com.amazonaws.auth.BasicAWSCredentials;
16
4
  import com.amazonaws.services.s3.AmazonS3Client;
17
- import com.amazonaws.services.s3.model.ListObjectsRequest;
18
- import com.amazonaws.services.s3.model.S3ObjectSummary;
19
- import com.amazonaws.services.s3.model.ObjectListing;
20
- import com.amazonaws.services.s3.model.GetObjectRequest;
21
- import com.amazonaws.services.s3.model.S3Object;
22
- import com.amazonaws.ClientConfiguration;
23
- import com.amazonaws.Protocol;
24
5
  import org.embulk.config.Config;
25
- import org.embulk.config.ConfigInject;
26
6
  import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.Task;
28
- import org.embulk.config.TaskSource;
29
- import org.embulk.config.ConfigSource;
30
- import org.embulk.config.ConfigDiff;
31
- import org.embulk.config.CommitReport;
32
- import org.embulk.spi.BufferAllocator;
33
- import org.embulk.spi.Exec;
34
- import org.embulk.spi.FileInputPlugin;
35
- import org.embulk.spi.TransactionalFileInput;
36
- import org.embulk.spi.util.InputStreamFileInput;
37
- import org.embulk.input.s3.RetryExecutor.Retryable;
38
- import org.embulk.input.s3.RetryExecutor.RetryGiveupException;
39
- import static org.embulk.input.s3.RetryExecutor.retryExecutor;
7
+ import org.embulk.input.s3.AbstractS3FileInputPlugin;
40
8
 
41
9
  public class S3FileInputPlugin
42
- implements FileInputPlugin
10
+ extends AbstractS3FileInputPlugin
43
11
  {
44
- public interface PluginTask
45
- extends Task
12
+ public interface S3PluginTask
13
+ extends PluginTask
46
14
  {
47
- @Config("bucket")
48
- public String getBucket();
49
-
50
- @Config("path_prefix")
51
- public String getPathPrefix();
52
-
53
- @Config("last_path")
54
- @ConfigDefault("null")
55
- public Optional<String> getLastPath();
56
-
57
15
  @Config("endpoint")
58
16
  @ConfigDefault("null")
59
17
  public Optional<String> getEndpoint();
60
-
61
- // TODO timeout, ssl, etc
62
-
63
- @Config("access_key_id")
64
- public String getAccessKeyId();
65
-
66
- @Config("secret_access_key")
67
- public String getSecretAccessKey();
68
-
69
- // TODO support more options such as STS
70
-
71
- public List<String> getFiles();
72
- public void setFiles(List<String> files);
73
-
74
- @ConfigInject
75
- public BufferAllocator getBufferAllocator();
76
18
  }
77
19
 
78
20
  @Override
79
- public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
21
+ protected Class<? extends PluginTask> getTaskClass()
80
22
  {
81
- PluginTask task = config.loadConfig(PluginTask.class);
82
-
83
- // list files recursively
84
- task.setFiles(listFiles(task));
85
-
86
- // TODO what if task.getFiles().isEmpty()?
87
-
88
- // number of processors is same with number of files
89
- return resume(task.dump(), task.getFiles().size(), control);
23
+ return S3PluginTask.class;
90
24
  }
91
25
 
92
26
  @Override
93
- public ConfigDiff resume(TaskSource taskSource,
94
- int taskCount,
95
- FileInputPlugin.Control control)
27
+ protected AmazonS3Client newS3Client(PluginTask task)
96
28
  {
97
- PluginTask task = taskSource.loadTask(PluginTask.class);
98
-
99
- control.run(taskSource, taskCount);
29
+ S3PluginTask t = (S3PluginTask) task;
100
30
 
101
- // build next config
102
- ConfigDiff configDiff = Exec.newConfigDiff();
31
+ AmazonS3Client client = super.newS3Client(t);
103
32
 
104
- // last_path
105
- if (task.getFiles().isEmpty()) {
106
- // keep the last value
107
- if (task.getLastPath().isPresent()) {
108
- configDiff.set("last_path", task.getLastPath().get());
109
- }
110
- } else {
111
- List<String> files = new ArrayList<String>(task.getFiles());
112
- Collections.sort(files);
113
- configDiff.set("last_path", files.get(files.size() - 1));
33
+ if (t.getEndpoint().isPresent()) {
34
+ client.setEndpoint(t.getEndpoint().get());
114
35
  }
115
36
 
116
- return configDiff;
117
- }
118
-
119
- @Override
120
- public void cleanup(TaskSource taskSource,
121
- int taskCount,
122
- List<CommitReport> successCommitReports)
123
- {
124
- // do nothing
125
- }
126
-
127
- public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
128
- {
129
- final AWSCredentials cred = new BasicAWSCredentials(
130
- task.getAccessKeyId(), task.getSecretAccessKey());
131
- return new AWSCredentialsProvider() {
132
- public AWSCredentials getCredentials()
133
- {
134
- return cred;
135
- }
136
-
137
- public void refresh()
138
- {
139
- }
140
- };
141
- }
142
-
143
- private static AmazonS3Client newS3Client(PluginTask task)
144
- {
145
- AWSCredentialsProvider credentials = getCredentialsProvider(task);
146
- AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
147
37
  return client;
148
38
  }
149
-
150
- private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
151
- Optional<String> endpoint)
152
- {
153
- // TODO get config from AmazonS3Task
154
- ClientConfiguration clientConfig = new ClientConfiguration();
155
- //clientConfig.setProtocol(Protocol.HTTP);
156
- clientConfig.setMaxConnections(50); // SDK default: 50
157
- clientConfig.setMaxErrorRetry(3); // SDK default: 3
158
- clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
159
-
160
- AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
161
-
162
- if (endpoint.isPresent()) {
163
- client.setEndpoint(endpoint.get());
164
- }
165
-
166
- return client;
167
- }
168
-
169
- public List<String> listFiles(PluginTask task)
170
- {
171
- AmazonS3Client client = newS3Client(task);
172
- String bucketName = task.getBucket();
173
-
174
- return listS3FilesByPrefix(client, bucketName, task.getPathPrefix(), task.getLastPath());
175
- }
176
-
177
- /**
178
- * Lists S3 filenames filtered by prefix.
179
- *
180
- * The resulting list does not include the file that's size == 0.
181
- */
182
- public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName,
183
- String prefix, Optional<String> lastPath)
184
- {
185
- ImmutableList.Builder<String> builder = ImmutableList.builder();
186
-
187
- String lastKey = lastPath.orNull();
188
- do {
189
- ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
190
- ObjectListing ol = client.listObjects(req);
191
- for(S3ObjectSummary s : ol.getObjectSummaries()) {
192
- if (s.getSize() > 0) {
193
- builder.add(s.getKey());
194
- }
195
- }
196
- lastKey = ol.getNextMarker();
197
- } while(lastKey != null);
198
-
199
- return builder.build();
200
- }
201
-
202
- @Override
203
- public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
204
- {
205
- PluginTask task = taskSource.loadTask(PluginTask.class);
206
- return new S3FileInput(task, taskIndex);
207
- }
208
-
209
- private static class S3RetryableOpener
210
- implements RetryableInputStream.Opener
211
- {
212
- private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
213
-
214
- private final AmazonS3Client client;
215
- private final GetObjectRequest request;
216
- private final long contentLength;
217
-
218
- public S3RetryableOpener(AmazonS3Client client, GetObjectRequest request, long contentLength)
219
- {
220
- this.client = client;
221
- this.request = request;
222
- this.contentLength = contentLength;
223
- }
224
-
225
- @Override
226
- public InputStream open(final long offset, final Exception exception) throws IOException
227
- {
228
- try {
229
- return retryExecutor()
230
- .withRetryLimit(3)
231
- .withInitialRetryWait(500)
232
- .withMaxRetryWait(30*1000)
233
- .runInterruptible(new Retryable<InputStream>() {
234
- @Override
235
- public InputStream call() throws InterruptedIOException
236
- {
237
- log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), exception);
238
- request.setRange(offset, contentLength - 1); // [first, last]
239
- return client.getObject(request).getObjectContent();
240
- }
241
-
242
- @Override
243
- public boolean isRetryableException(Exception exception)
244
- {
245
- return true; // TODO
246
- }
247
-
248
- @Override
249
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
250
- throws RetryGiveupException
251
- {
252
- String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
253
- retryCount, retryLimit, retryWait/1000, exception.getMessage());
254
- if (retryCount % 3 == 0) {
255
- log.warn(message, exception);
256
- } else {
257
- log.warn(message);
258
- }
259
- }
260
-
261
- @Override
262
- public void onGiveup(Exception firstException, Exception lastException)
263
- throws RetryGiveupException
264
- {
265
- }
266
- });
267
- } catch (RetryGiveupException ex) {
268
- Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
269
- throw Throwables.propagate(ex.getCause());
270
- } catch (InterruptedException ex) {
271
- throw new InterruptedIOException();
272
- }
273
- }
274
- }
275
-
276
- public static class S3FileInput
277
- extends InputStreamFileInput
278
- implements TransactionalFileInput
279
- {
280
- // TODO create single-file InputStreamFileInput utility
281
- private static class SingleFileProvider
282
- implements InputStreamFileInput.Provider
283
- {
284
- private AmazonS3Client client;
285
- private final String bucket;
286
- private final String key;
287
- private boolean opened = false;
288
-
289
- public SingleFileProvider(PluginTask task, int taskIndex)
290
- {
291
- this.client = newS3Client(task);
292
- this.bucket = task.getBucket();
293
- this.key = task.getFiles().get(taskIndex);
294
- }
295
-
296
- @Override
297
- public InputStream openNext() throws IOException
298
- {
299
- if (opened) {
300
- return null;
301
- }
302
- opened = true;
303
- GetObjectRequest request = new GetObjectRequest(bucket, key);
304
- S3Object obj = client.getObject(request);
305
- return new RetryableInputStream(obj.getObjectContent(), new S3RetryableOpener(client, request, obj.getObjectMetadata().getContentLength()));
306
- }
307
-
308
- @Override
309
- public void close() { }
310
- }
311
-
312
- public S3FileInput(PluginTask task, int taskIndex)
313
- {
314
- super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
315
- }
316
-
317
- public void abort() { }
318
-
319
- public CommitReport commit()
320
- {
321
- return Exec.newCommitReport();
322
- }
323
-
324
- @Override
325
- public void close() { }
326
- }
327
39
  }