embulk-input-s3 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd67a9aea3d1352358739b2c6917f3204301443e
4
- data.tar.gz: b143676db2502a2e39d7cb430bd4bf8f67933a5b
3
+ metadata.gz: 02cd2ac88b83d92846715d194928f38a32693ebe
4
+ data.tar.gz: 145815003f894fc2a2a224266e00908dae89cf79
5
5
  SHA512:
6
- metadata.gz: 08b84503dda64c6d4e3765d66a368070d47bf1214d96ac9f5ff77478cc888782c799fb193a0925ce508b8b0867ead0e2bde4185db2f92eb0433807c400cd18d6
7
- data.tar.gz: b6541d55d9e4b7317267b3f57eaed9795c0b1ff7591f4de240890e2b5f0580575b39461e066fbf85bef5e894e54c201641721195899b9246f50f79756e703239
6
+ metadata.gz: e9d81495964337639a2a6e7de2ce9ca3c3e5b39ca869ff1873e5ed119de95ac8ae778fe3450d85068dcd7b773bbfd8345835b46ab226ad73f1117c2fb614c0bc
7
+ data.tar.gz: 5f50eb790d085fb427a23e3819bdada69bb0cafba84e6e4175a8a317e7cf7e37e0a675866a87b893600c6ccfeb59654662e452c56d93a556d5559319c2760543
@@ -1,41 +1,3 @@
1
- plugins {
2
- id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "0.1.5"
4
- id "java"
5
- }
6
- import com.github.jrubygradle.JRubyExec
7
- repositories {
8
- mavenCentral()
9
- mavenLocal()
10
- jcenter()
11
- }
12
- configurations {
13
- provided
14
- }
15
-
16
- version = "0.1.7"
17
-
18
- dependencies {
19
- compile "org.embulk:embulk-core:0.5.3"
20
- provided "org.embulk:embulk-core:0.5.3"
21
- compile "com.amazonaws:aws-java-sdk-s3:1.9.22"
22
- testCompile "junit:junit:4.+"
23
- testCompile "org.mockito:mockito-core:1.+"
24
- }
25
-
26
- task classpath(type: Copy, dependsOn: ["jar"]) {
27
- doFirst { file("classpath").deleteDir() }
28
- from (configurations.runtime - configurations.provided + files(jar.archivePath))
29
- into "classpath"
30
- }
31
- clean { delete 'classpath' }
32
-
33
- task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
34
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
35
- script "build/gemspec"
36
- doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
37
- }
38
-
39
1
  task gemspec << { file("build/gemspec").write($/
40
2
  Gem::Specification.new do |spec|
41
3
  spec.name = "${project.name}"
@@ -0,0 +1,318 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Collections;
6
+ import java.io.IOException;
7
+ import java.io.InterruptedIOException;
8
+ import java.io.InputStream;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.common.base.Optional;
11
+ import com.google.common.base.Throwables;
12
+ import org.slf4j.Logger;
13
+ import com.amazonaws.auth.AWSCredentials;
14
+ import com.amazonaws.auth.AWSCredentialsProvider;
15
+ import com.amazonaws.auth.BasicAWSCredentials;
16
+ import com.amazonaws.services.s3.AmazonS3Client;
17
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
18
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
19
+ import com.amazonaws.services.s3.model.ObjectListing;
20
+ import com.amazonaws.services.s3.model.GetObjectRequest;
21
+ import com.amazonaws.services.s3.model.S3Object;
22
+ import com.amazonaws.ClientConfiguration;
23
+ import com.amazonaws.Protocol;
24
+ import org.embulk.config.Config;
25
+ import org.embulk.config.ConfigInject;
26
+ import org.embulk.config.ConfigDefault;
27
+ import org.embulk.config.Task;
28
+ import org.embulk.config.TaskSource;
29
+ import org.embulk.config.ConfigSource;
30
+ import org.embulk.config.ConfigDiff;
31
+ import org.embulk.config.CommitReport;
32
+ import org.embulk.spi.BufferAllocator;
33
+ import org.embulk.spi.Exec;
34
+ import org.embulk.spi.FileInputPlugin;
35
+ import org.embulk.spi.TransactionalFileInput;
36
+ import org.embulk.spi.util.InputStreamFileInput;
37
+ import org.embulk.spi.util.ResumableInputStream;
38
+ import org.embulk.spi.util.RetryExecutor.Retryable;
39
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
40
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
41
+
42
+ public abstract class AbstractS3FileInputPlugin
43
+ implements FileInputPlugin
44
+ {
45
+ public interface PluginTask
46
+ extends Task
47
+ {
48
+ @Config("bucket")
49
+ public String getBucket();
50
+
51
+ @Config("path_prefix")
52
+ public String getPathPrefix();
53
+
54
+ @Config("last_path")
55
+ @ConfigDefault("null")
56
+ public Optional<String> getLastPath();
57
+
58
+ @Config("access_key_id")
59
+ public String getAccessKeyId();
60
+
61
+ @Config("secret_access_key")
62
+ public String getSecretAccessKey();
63
+
64
+ // TODO timeout, ssl, etc
65
+
66
+ // TODO support more options such as STS
67
+
68
+ public List<String> getFiles();
69
+ public void setFiles(List<String> files);
70
+
71
+ @ConfigInject
72
+ public BufferAllocator getBufferAllocator();
73
+ }
74
+
75
+ protected abstract Class<? extends PluginTask> getTaskClass();
76
+
77
+ @Override
78
+ public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
79
+ {
80
+ PluginTask task = config.loadConfig(getTaskClass());
81
+
82
+ // list files recursively
83
+ task.setFiles(listFiles(task));
84
+
85
+ // number of processors is same with number of files
86
+ return resume(task.dump(), task.getFiles().size(), control);
87
+ }
88
+
89
+ @Override
90
+ public ConfigDiff resume(TaskSource taskSource,
91
+ int taskCount,
92
+ FileInputPlugin.Control control)
93
+ {
94
+ PluginTask task = taskSource.loadTask(getTaskClass());
95
+
96
+ // validate task
97
+ newS3Client(task);
98
+
99
+ control.run(taskSource, taskCount);
100
+
101
+ // build next config
102
+ ConfigDiff configDiff = Exec.newConfigDiff();
103
+
104
+ // last_path
105
+ if (task.getFiles().isEmpty()) {
106
+ // keep the last value
107
+ if (task.getLastPath().isPresent()) {
108
+ configDiff.set("last_path", task.getLastPath().get());
109
+ }
110
+ } else {
111
+ List<String> files = new ArrayList<String>(task.getFiles());
112
+ Collections.sort(files);
113
+ configDiff.set("last_path", files.get(files.size() - 1));
114
+ }
115
+
116
+ return configDiff;
117
+ }
118
+
119
+ @Override
120
+ public void cleanup(TaskSource taskSource,
121
+ int taskCount,
122
+ List<CommitReport> successCommitReports)
123
+ {
124
+ // do nothing
125
+ }
126
+
127
+ protected AmazonS3Client newS3Client(PluginTask task)
128
+ {
129
+ return new AmazonS3Client(getCredentialsProvider(task), getClientConfiguration(task));
130
+ }
131
+
132
+ protected AWSCredentialsProvider getCredentialsProvider(PluginTask task)
133
+ {
134
+ final AWSCredentials cred = new BasicAWSCredentials(
135
+ task.getAccessKeyId(), task.getSecretAccessKey());
136
+ return new AWSCredentialsProvider() {
137
+ public AWSCredentials getCredentials()
138
+ {
139
+ return cred;
140
+ }
141
+
142
+ public void refresh()
143
+ {
144
+ }
145
+ };
146
+ }
147
+
148
+ protected ClientConfiguration getClientConfiguration(PluginTask task)
149
+ {
150
+ ClientConfiguration clientConfig = new ClientConfiguration();
151
+
152
+ //clientConfig.setProtocol(Protocol.HTTP);
153
+ clientConfig.setMaxConnections(50); // SDK default: 50
154
+ clientConfig.setMaxErrorRetry(3); // SDK default: 3
155
+ clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
156
+
157
+ return clientConfig;
158
+ }
159
+
160
+ private List<String> listFiles(PluginTask task)
161
+ {
162
+ AmazonS3Client client = newS3Client(task);
163
+ String bucketName = task.getBucket();
164
+
165
+ return listS3FilesByPrefix(client, bucketName, task.getPathPrefix(), task.getLastPath());
166
+ }
167
+
168
+ /**
169
+ * Lists S3 filenames filtered by prefix.
170
+ *
171
+ * The resulting list does not include the file that's size == 0.
172
+ */
173
+ public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName,
174
+ String prefix, Optional<String> lastPath)
175
+ {
176
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
177
+
178
+ String lastKey = lastPath.orNull();
179
+ do {
180
+ ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
181
+ ObjectListing ol = client.listObjects(req);
182
+ for(S3ObjectSummary s : ol.getObjectSummaries()) {
183
+ if (s.getSize() > 0) {
184
+ builder.add(s.getKey());
185
+ }
186
+ }
187
+ lastKey = ol.getNextMarker();
188
+ } while(lastKey != null);
189
+
190
+ return builder.build();
191
+ }
192
+
193
+ @Override
194
+ public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
195
+ {
196
+ PluginTask task = taskSource.loadTask(getTaskClass());
197
+ return new S3FileInput(task, taskIndex);
198
+ }
199
+
200
+ private static class S3InputStreamReopener
201
+ implements ResumableInputStream.Reopener
202
+ {
203
+ private final Logger log = Exec.getLogger(S3InputStreamReopener.class);
204
+
205
+ private final AmazonS3Client client;
206
+ private final GetObjectRequest request;
207
+ private final long contentLength;
208
+
209
+ public S3InputStreamReopener(AmazonS3Client client, GetObjectRequest request, long contentLength)
210
+ {
211
+ this.client = client;
212
+ this.request = request;
213
+ this.contentLength = contentLength;
214
+ }
215
+
216
+ @Override
217
+ public InputStream reopen(final long offset, final Exception closedCause) throws IOException
218
+ {
219
+ try {
220
+ return retryExecutor()
221
+ .withRetryLimit(3)
222
+ .withInitialRetryWait(500)
223
+ .withMaxRetryWait(30*1000)
224
+ .runInterruptible(new Retryable<InputStream>() {
225
+ @Override
226
+ public InputStream call() throws InterruptedIOException
227
+ {
228
+ log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
229
+ request.setRange(offset, contentLength - 1); // [first, last]
230
+ return client.getObject(request).getObjectContent();
231
+ }
232
+
233
+ @Override
234
+ public boolean isRetryableException(Exception exception)
235
+ {
236
+ return true; // TODO
237
+ }
238
+
239
+ @Override
240
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
241
+ throws RetryGiveupException
242
+ {
243
+ String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
244
+ retryCount, retryLimit, retryWait/1000, exception.getMessage());
245
+ if (retryCount % 3 == 0) {
246
+ log.warn(message, exception);
247
+ } else {
248
+ log.warn(message);
249
+ }
250
+ }
251
+
252
+ @Override
253
+ public void onGiveup(Exception firstException, Exception lastException)
254
+ throws RetryGiveupException
255
+ {
256
+ }
257
+ });
258
+ } catch (RetryGiveupException ex) {
259
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
260
+ throw Throwables.propagate(ex.getCause());
261
+ } catch (InterruptedException ex) {
262
+ throw new InterruptedIOException();
263
+ }
264
+ }
265
+ }
266
+
267
+ public class S3FileInput
268
+ extends InputStreamFileInput
269
+ implements TransactionalFileInput
270
+ {
271
+ public S3FileInput(PluginTask task, int taskIndex)
272
+ {
273
+ super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
274
+ }
275
+
276
+ public void abort() { }
277
+
278
+ public CommitReport commit()
279
+ {
280
+ return Exec.newCommitReport();
281
+ }
282
+
283
+ @Override
284
+ public void close() { }
285
+ }
286
+
287
+ // TODO create single-file InputStreamFileInput utility
288
+ private class SingleFileProvider
289
+ implements InputStreamFileInput.Provider
290
+ {
291
+ private AmazonS3Client client;
292
+ private final String bucket;
293
+ private final String key;
294
+ private boolean opened = false;
295
+
296
+ public SingleFileProvider(PluginTask task, int taskIndex)
297
+ {
298
+ this.client = newS3Client(task);
299
+ this.bucket = task.getBucket();
300
+ this.key = task.getFiles().get(taskIndex);
301
+ }
302
+
303
+ @Override
304
+ public InputStream openNext() throws IOException
305
+ {
306
+ if (opened) {
307
+ return null;
308
+ }
309
+ opened = true;
310
+ GetObjectRequest request = new GetObjectRequest(bucket, key);
311
+ S3Object obj = client.getObject(request);
312
+ return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, obj.getObjectMetadata().getContentLength()));
313
+ }
314
+
315
+ @Override
316
+ public void close() { }
317
+ }
318
+ }
@@ -1,327 +1,39 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import java.util.List;
4
- import java.util.ArrayList;
5
- import java.util.Collections;
6
- import java.io.IOException;
7
- import java.io.InterruptedIOException;
8
- import java.io.InputStream;
9
- import com.google.common.collect.ImmutableList;
10
3
  import com.google.common.base.Optional;
11
- import com.google.common.base.Throwables;
12
- import org.slf4j.Logger;
13
- import com.amazonaws.auth.AWSCredentials;
14
- import com.amazonaws.auth.AWSCredentialsProvider;
15
- import com.amazonaws.auth.BasicAWSCredentials;
16
4
  import com.amazonaws.services.s3.AmazonS3Client;
17
- import com.amazonaws.services.s3.model.ListObjectsRequest;
18
- import com.amazonaws.services.s3.model.S3ObjectSummary;
19
- import com.amazonaws.services.s3.model.ObjectListing;
20
- import com.amazonaws.services.s3.model.GetObjectRequest;
21
- import com.amazonaws.services.s3.model.S3Object;
22
- import com.amazonaws.ClientConfiguration;
23
- import com.amazonaws.Protocol;
24
5
  import org.embulk.config.Config;
25
- import org.embulk.config.ConfigInject;
26
6
  import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.Task;
28
- import org.embulk.config.TaskSource;
29
- import org.embulk.config.ConfigSource;
30
- import org.embulk.config.ConfigDiff;
31
- import org.embulk.config.CommitReport;
32
- import org.embulk.spi.BufferAllocator;
33
- import org.embulk.spi.Exec;
34
- import org.embulk.spi.FileInputPlugin;
35
- import org.embulk.spi.TransactionalFileInput;
36
- import org.embulk.spi.util.InputStreamFileInput;
37
- import org.embulk.input.s3.RetryExecutor.Retryable;
38
- import org.embulk.input.s3.RetryExecutor.RetryGiveupException;
39
- import static org.embulk.input.s3.RetryExecutor.retryExecutor;
7
+ import org.embulk.input.s3.AbstractS3FileInputPlugin;
40
8
 
41
9
  public class S3FileInputPlugin
42
- implements FileInputPlugin
10
+ extends AbstractS3FileInputPlugin
43
11
  {
44
- public interface PluginTask
45
- extends Task
12
+ public interface S3PluginTask
13
+ extends PluginTask
46
14
  {
47
- @Config("bucket")
48
- public String getBucket();
49
-
50
- @Config("path_prefix")
51
- public String getPathPrefix();
52
-
53
- @Config("last_path")
54
- @ConfigDefault("null")
55
- public Optional<String> getLastPath();
56
-
57
15
  @Config("endpoint")
58
16
  @ConfigDefault("null")
59
17
  public Optional<String> getEndpoint();
60
-
61
- // TODO timeout, ssl, etc
62
-
63
- @Config("access_key_id")
64
- public String getAccessKeyId();
65
-
66
- @Config("secret_access_key")
67
- public String getSecretAccessKey();
68
-
69
- // TODO support more options such as STS
70
-
71
- public List<String> getFiles();
72
- public void setFiles(List<String> files);
73
-
74
- @ConfigInject
75
- public BufferAllocator getBufferAllocator();
76
18
  }
77
19
 
78
20
  @Override
79
- public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
21
+ protected Class<? extends PluginTask> getTaskClass()
80
22
  {
81
- PluginTask task = config.loadConfig(PluginTask.class);
82
-
83
- // list files recursively
84
- task.setFiles(listFiles(task));
85
-
86
- // TODO what if task.getFiles().isEmpty()?
87
-
88
- // number of processors is same with number of files
89
- return resume(task.dump(), task.getFiles().size(), control);
23
+ return S3PluginTask.class;
90
24
  }
91
25
 
92
26
  @Override
93
- public ConfigDiff resume(TaskSource taskSource,
94
- int taskCount,
95
- FileInputPlugin.Control control)
27
+ protected AmazonS3Client newS3Client(PluginTask task)
96
28
  {
97
- PluginTask task = taskSource.loadTask(PluginTask.class);
98
-
99
- control.run(taskSource, taskCount);
29
+ S3PluginTask t = (S3PluginTask) task;
100
30
 
101
- // build next config
102
- ConfigDiff configDiff = Exec.newConfigDiff();
31
+ AmazonS3Client client = super.newS3Client(t);
103
32
 
104
- // last_path
105
- if (task.getFiles().isEmpty()) {
106
- // keep the last value
107
- if (task.getLastPath().isPresent()) {
108
- configDiff.set("last_path", task.getLastPath().get());
109
- }
110
- } else {
111
- List<String> files = new ArrayList<String>(task.getFiles());
112
- Collections.sort(files);
113
- configDiff.set("last_path", files.get(files.size() - 1));
33
+ if (t.getEndpoint().isPresent()) {
34
+ client.setEndpoint(t.getEndpoint().get());
114
35
  }
115
36
 
116
- return configDiff;
117
- }
118
-
119
- @Override
120
- public void cleanup(TaskSource taskSource,
121
- int taskCount,
122
- List<CommitReport> successCommitReports)
123
- {
124
- // do nothing
125
- }
126
-
127
- public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
128
- {
129
- final AWSCredentials cred = new BasicAWSCredentials(
130
- task.getAccessKeyId(), task.getSecretAccessKey());
131
- return new AWSCredentialsProvider() {
132
- public AWSCredentials getCredentials()
133
- {
134
- return cred;
135
- }
136
-
137
- public void refresh()
138
- {
139
- }
140
- };
141
- }
142
-
143
- private static AmazonS3Client newS3Client(PluginTask task)
144
- {
145
- AWSCredentialsProvider credentials = getCredentialsProvider(task);
146
- AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
147
37
  return client;
148
38
  }
149
-
150
- private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
151
- Optional<String> endpoint)
152
- {
153
- // TODO get config from AmazonS3Task
154
- ClientConfiguration clientConfig = new ClientConfiguration();
155
- //clientConfig.setProtocol(Protocol.HTTP);
156
- clientConfig.setMaxConnections(50); // SDK default: 50
157
- clientConfig.setMaxErrorRetry(3); // SDK default: 3
158
- clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
159
-
160
- AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
161
-
162
- if (endpoint.isPresent()) {
163
- client.setEndpoint(endpoint.get());
164
- }
165
-
166
- return client;
167
- }
168
-
169
- public List<String> listFiles(PluginTask task)
170
- {
171
- AmazonS3Client client = newS3Client(task);
172
- String bucketName = task.getBucket();
173
-
174
- return listS3FilesByPrefix(client, bucketName, task.getPathPrefix(), task.getLastPath());
175
- }
176
-
177
- /**
178
- * Lists S3 filenames filtered by prefix.
179
- *
180
- * The resulting list does not include the file that's size == 0.
181
- */
182
- public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName,
183
- String prefix, Optional<String> lastPath)
184
- {
185
- ImmutableList.Builder<String> builder = ImmutableList.builder();
186
-
187
- String lastKey = lastPath.orNull();
188
- do {
189
- ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
190
- ObjectListing ol = client.listObjects(req);
191
- for(S3ObjectSummary s : ol.getObjectSummaries()) {
192
- if (s.getSize() > 0) {
193
- builder.add(s.getKey());
194
- }
195
- }
196
- lastKey = ol.getNextMarker();
197
- } while(lastKey != null);
198
-
199
- return builder.build();
200
- }
201
-
202
- @Override
203
- public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
204
- {
205
- PluginTask task = taskSource.loadTask(PluginTask.class);
206
- return new S3FileInput(task, taskIndex);
207
- }
208
-
209
- private static class S3RetryableOpener
210
- implements RetryableInputStream.Opener
211
- {
212
- private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
213
-
214
- private final AmazonS3Client client;
215
- private final GetObjectRequest request;
216
- private final long contentLength;
217
-
218
- public S3RetryableOpener(AmazonS3Client client, GetObjectRequest request, long contentLength)
219
- {
220
- this.client = client;
221
- this.request = request;
222
- this.contentLength = contentLength;
223
- }
224
-
225
- @Override
226
- public InputStream open(final long offset, final Exception exception) throws IOException
227
- {
228
- try {
229
- return retryExecutor()
230
- .withRetryLimit(3)
231
- .withInitialRetryWait(500)
232
- .withMaxRetryWait(30*1000)
233
- .runInterruptible(new Retryable<InputStream>() {
234
- @Override
235
- public InputStream call() throws InterruptedIOException
236
- {
237
- log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), exception);
238
- request.setRange(offset, contentLength - 1); // [first, last]
239
- return client.getObject(request).getObjectContent();
240
- }
241
-
242
- @Override
243
- public boolean isRetryableException(Exception exception)
244
- {
245
- return true; // TODO
246
- }
247
-
248
- @Override
249
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
250
- throws RetryGiveupException
251
- {
252
- String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
253
- retryCount, retryLimit, retryWait/1000, exception.getMessage());
254
- if (retryCount % 3 == 0) {
255
- log.warn(message, exception);
256
- } else {
257
- log.warn(message);
258
- }
259
- }
260
-
261
- @Override
262
- public void onGiveup(Exception firstException, Exception lastException)
263
- throws RetryGiveupException
264
- {
265
- }
266
- });
267
- } catch (RetryGiveupException ex) {
268
- Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
269
- throw Throwables.propagate(ex.getCause());
270
- } catch (InterruptedException ex) {
271
- throw new InterruptedIOException();
272
- }
273
- }
274
- }
275
-
276
- public static class S3FileInput
277
- extends InputStreamFileInput
278
- implements TransactionalFileInput
279
- {
280
- // TODO create single-file InputStreamFileInput utility
281
- private static class SingleFileProvider
282
- implements InputStreamFileInput.Provider
283
- {
284
- private AmazonS3Client client;
285
- private final String bucket;
286
- private final String key;
287
- private boolean opened = false;
288
-
289
- public SingleFileProvider(PluginTask task, int taskIndex)
290
- {
291
- this.client = newS3Client(task);
292
- this.bucket = task.getBucket();
293
- this.key = task.getFiles().get(taskIndex);
294
- }
295
-
296
- @Override
297
- public InputStream openNext() throws IOException
298
- {
299
- if (opened) {
300
- return null;
301
- }
302
- opened = true;
303
- GetObjectRequest request = new GetObjectRequest(bucket, key);
304
- S3Object obj = client.getObject(request);
305
- return new RetryableInputStream(obj.getObjectContent(), new S3RetryableOpener(client, request, obj.getObjectMetadata().getContentLength()));
306
- }
307
-
308
- @Override
309
- public void close() { }
310
- }
311
-
312
- public S3FileInput(PluginTask task, int taskIndex)
313
- {
314
- super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
315
- }
316
-
317
- public void abort() { }
318
-
319
- public CommitReport commit()
320
- {
321
- return Exec.newCommitReport();
322
- }
323
-
324
- @Override
325
- public void close() { }
326
- }
327
39
  }