embulk-input-s3 0.2.16 → 0.2.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95a2e3a70de76cdd69c0b0252f37b32671ce3896
4
- data.tar.gz: 0f7a8a710a28478d528df8a31d08b996a0595772
3
+ metadata.gz: 3c9e972f5326281de40fc46190dc3d268282b34e
4
+ data.tar.gz: 0c63221f881710e6c4a18dbf10ab73be701193ff
5
5
  SHA512:
6
- metadata.gz: 6ae53adc0606da4ec1e2a72a742b18b27ea40145b27baf5375e905670f243b411d43e15a55fdeb2172cc158e1e83b257f290a79d2ff92aa890f9ab57e38d2174
7
- data.tar.gz: 332d2963cfde7edf02ed80b9fda7d651a12c490e03f7f2984cee329858b978ebd5c2d5401e9ea30efc5e4a55e66ccd6d87d285a908bd3a4819dffa50de52591e
6
+ metadata.gz: 3983830d6e700a3b3e0a2cdadd063613c4ff41b39e01c96c00c2f8a33515fd8d8be601c644184119bb87e46d1eab394869e91cf1c557c6420ef022f40a93b31e
7
+ data.tar.gz: 27471ec5bbc698865ab44d2d4ca94b2074c7589c5c9c0ce919b3c3b5bc18b885b6566b48fb74803c99abc10c04716771a1b9faa231ff780de3740da55b468358
@@ -6,15 +6,17 @@ import com.amazonaws.Protocol;
6
6
  import com.amazonaws.auth.AWSCredentialsProvider;
7
7
  import com.amazonaws.services.s3.AmazonS3;
8
8
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
9
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
9
10
  import com.amazonaws.services.s3.model.GetObjectRequest;
10
11
  import com.amazonaws.services.s3.model.ListObjectsRequest;
11
12
  import com.amazonaws.services.s3.model.ObjectListing;
13
+ import com.amazonaws.services.s3.model.ObjectMetadata;
12
14
  import com.amazonaws.services.s3.model.S3Object;
15
+ import com.amazonaws.services.s3.model.S3ObjectInputStream;
13
16
  import com.amazonaws.services.s3.model.S3ObjectSummary;
14
17
  import com.amazonaws.services.s3.model.StorageClass;
15
18
  import com.google.common.annotations.VisibleForTesting;
16
19
  import com.google.common.base.Optional;
17
- import com.google.common.base.Throwables;
18
20
  import org.embulk.config.Config;
19
21
  import org.embulk.config.ConfigDefault;
20
22
  import org.embulk.config.ConfigDiff;
@@ -30,15 +32,13 @@ import org.embulk.spi.FileInputPlugin;
30
32
  import org.embulk.spi.TransactionalFileInput;
31
33
  import org.embulk.spi.util.InputStreamFileInput;
32
34
  import org.embulk.spi.util.ResumableInputStream;
33
- import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
34
- import org.embulk.spi.util.RetryExecutor.Retryable;
35
+ import org.embulk.spi.util.RetryExecutor;
35
36
  import org.embulk.util.aws.credentials.AwsCredentials;
36
37
  import org.embulk.util.aws.credentials.AwsCredentialsTask;
37
38
  import org.slf4j.Logger;
38
39
 
39
40
  import java.io.IOException;
40
41
  import java.io.InputStream;
41
- import java.io.InterruptedIOException;
42
42
  import java.util.Iterator;
43
43
  import java.util.List;
44
44
 
@@ -50,13 +50,18 @@ public abstract class AbstractS3FileInputPlugin
50
50
  private static final Logger LOGGER = Exec.getLogger(S3FileInputPlugin.class);
51
51
 
52
52
  public interface PluginTask
53
- extends AwsCredentialsTask, FileList.Task, Task
53
+ extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
54
54
  {
55
55
  @Config("bucket")
56
56
  public String getBucket();
57
57
 
58
58
  @Config("path_prefix")
59
- public String getPathPrefix();
59
+ @ConfigDefault("null")
60
+ public Optional<String> getPathPrefix();
61
+
62
+ @Config("path")
63
+ @ConfigDefault("null")
64
+ public Optional<String> getPath();
60
65
 
61
66
  @Config("last_path")
62
67
  @ConfigDefault("null")
@@ -97,6 +102,7 @@ public abstract class AbstractS3FileInputPlugin
97
102
  {
98
103
  PluginTask task = config.loadConfig(getTaskClass());
99
104
 
105
+ validateInputTask(task);
100
106
  // list files recursively
101
107
  task.setFiles(listFiles(task));
102
108
 
@@ -208,21 +214,42 @@ public abstract class AbstractS3FileInputPlugin
208
214
  }
209
215
  }
210
216
 
211
- private FileList listFiles(PluginTask task)
217
+ /**
218
+ * Build the common retry executor from some configuration params of plugin task.
219
+ * @param task Plugin task.
220
+ * @return RetryExecutor object
221
+ */
222
+ private static RetryExecutor retryExecutorFrom(RetrySupportPluginTask task)
223
+ {
224
+ return retryExecutor()
225
+ .withRetryLimit(task.getMaximumRetries())
226
+ .withInitialRetryWait(task.getInitialRetryIntervalMillis())
227
+ .withMaxRetryWait(task.getMaximumRetryIntervalMillis());
228
+ }
229
+
230
+ private FileList listFiles(final PluginTask task)
212
231
  {
213
- LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix());
214
232
  try {
215
233
  AmazonS3 client = newS3Client(task);
216
234
  String bucketName = task.getBucket();
235
+ FileList.Builder builder = new FileList.Builder(task);
236
+ RetryExecutor retryExec = retryExecutorFrom(task);
237
+ if (task.getPath().isPresent()) {
238
+ LOGGER.info("Start getting object with path: [{}]", task.getPath().get());
239
+ addS3DirectObject(builder, client, task.getBucket(), task.getPath().get(), retryExec);
240
+ }
241
+ else {
242
+ // does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
243
+ LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
244
+ if (task.getPathPrefix().get().equals("/")) {
245
+ LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
246
+ }
217
247
 
218
- if (task.getPathPrefix().equals("/")) {
219
- LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
248
+ listS3FilesByPrefix(builder, client, bucketName,
249
+ task.getPathPrefix().get(), task.getLastPath(), task.getSkipGlacierObjects(), retryExec);
250
+ LOGGER.info("Found total [{}] files", builder.size());
220
251
  }
221
252
 
222
- FileList.Builder builder = new FileList.Builder(task);
223
- listS3FilesByPrefix(builder, client, bucketName,
224
- task.getPathPrefix(), task.getLastPath(), task.getSkipGlacierObjects());
225
- LOGGER.info("Found total [{}] files", builder.size());
226
253
  return builder.build();
227
254
  }
228
255
  catch (AmazonServiceException ex) {
@@ -236,11 +263,55 @@ public abstract class AbstractS3FileInputPlugin
236
263
  }
237
264
  throw ex;
238
265
  }
239
- catch (InterruptedException | RetryGiveupException ex) {
240
- throw new RuntimeException(ex);
266
+ }
267
+
268
+ @VisibleForTesting
269
+ public void addS3DirectObject(FileList.Builder builder,
270
+ final AmazonS3 client,
271
+ String bucket,
272
+ String objectKey)
273
+ {
274
+ addS3DirectObject(builder, client, bucket, objectKey, null);
275
+ }
276
+
277
+ @VisibleForTesting
278
+ public void addS3DirectObject(FileList.Builder builder,
279
+ final AmazonS3 client,
280
+ String bucket,
281
+ String objectKey,
282
+ RetryExecutor retryExec)
283
+ {
284
+ final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucket, objectKey);
285
+
286
+ ObjectMetadata objectMetadata = new AlwaysRetryable<ObjectMetadata>("Looking up for a single object") {
287
+ @Override
288
+ public ObjectMetadata call()
289
+ {
290
+ return client.getObjectMetadata(objectMetadataRequest);
291
+ }
292
+ }.executeWith(retryExec);
293
+
294
+ builder.add(objectKey, objectMetadata.getContentLength());
295
+ }
296
+
297
+ private void validateInputTask(PluginTask task)
298
+ {
299
+ if (!task.getPathPrefix().isPresent() && !task.getPath().isPresent()) {
300
+ throw new ConfigException("Either path or path_prefix is required");
241
301
  }
242
302
  }
243
303
 
304
+ @VisibleForTesting
305
+ public static void listS3FilesByPrefix(FileList.Builder builder,
306
+ final AmazonS3 client,
307
+ String bucketName,
308
+ String prefix,
309
+ Optional<String> lastPath,
310
+ boolean skipGlacierObjects)
311
+ {
312
+ listS3FilesByPrefix(builder, client, bucketName, prefix, lastPath, skipGlacierObjects, null);
313
+ }
314
+
244
315
  /**
245
316
  * Lists S3 filenames filtered by prefix.
246
317
  * <p>
@@ -251,30 +322,28 @@ public abstract class AbstractS3FileInputPlugin
251
322
  * @param prefix Amazon S3 bucket name prefix
252
323
  * @param lastPath last path
253
324
  * @param skipGlacierObjects skip gracier objects
254
- * @throws RetryGiveupException error when retrying
255
- * @throws InterruptedException error when retrying
325
+ * @param retryExec a retry executor object to do the retrying
256
326
  */
327
+ @VisibleForTesting
257
328
  public static void listS3FilesByPrefix(FileList.Builder builder,
258
- final AmazonS3 client, final String bucketName,
259
- final String prefix, Optional<String> lastPath, boolean skipGlacierObjects) throws RetryGiveupException, InterruptedException
329
+ final AmazonS3 client,
330
+ String bucketName,
331
+ String prefix,
332
+ Optional<String> lastPath,
333
+ boolean skipGlacierObjects,
334
+ RetryExecutor retryExec)
260
335
  {
261
336
  String lastKey = lastPath.orNull();
262
337
  do {
263
338
  final String finalLastKey = lastKey;
264
- Optional<ObjectListing> optOl = S3FileInputUtils.executeWithRetry(3, 500, 30 * 1000, new S3FileInputUtils.AlwaysRetryRetryable<Optional<ObjectListing>>()
265
- {
339
+ final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
340
+ ObjectListing ol = new AlwaysRetryable<ObjectListing>("Listing objects") {
266
341
  @Override
267
- public Optional<ObjectListing> call() throws AmazonServiceException
342
+ public ObjectListing call()
268
343
  {
269
- ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
270
- ObjectListing ol = client.listObjects(req);
271
- return Optional.of(ol);
344
+ return client.listObjects(req);
272
345
  }
273
- });
274
- if (!optOl.isPresent()) {
275
- break;
276
- }
277
- ObjectListing ol = optOl.get();
346
+ }.executeWith(retryExec);
278
347
  for (S3ObjectSummary s : ol.getObjectSummaries()) {
279
348
  if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
280
349
  if (skipGlacierObjects) {
@@ -313,67 +382,34 @@ public abstract class AbstractS3FileInputPlugin
313
382
  private final AmazonS3 client;
314
383
  private final GetObjectRequest request;
315
384
  private final long contentLength;
385
+ private final RetryExecutor retryExec;
316
386
 
317
387
  public S3InputStreamReopener(AmazonS3 client, GetObjectRequest request, long contentLength)
388
+ {
389
+ this(client, request, contentLength, null);
390
+ }
391
+
392
+ public S3InputStreamReopener(AmazonS3 client, GetObjectRequest request, long contentLength, RetryExecutor retryExec)
318
393
  {
319
394
  this.client = client;
320
395
  this.request = request;
321
396
  this.contentLength = contentLength;
397
+ this.retryExec = retryExec;
322
398
  }
323
399
 
324
400
  @Override
325
401
  public InputStream reopen(final long offset, final Exception closedCause) throws IOException
326
402
  {
327
- try {
328
- return retryExecutor()
329
- .withRetryLimit(3)
330
- .withInitialRetryWait(500)
331
- .withMaxRetryWait(30 * 1000)
332
- .runInterruptible(new Retryable<InputStream>()
333
- {
334
- @Override
335
- public InputStream call() throws InterruptedIOException
336
- {
337
- log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
338
- request.setRange(offset, contentLength - 1); // [first, last]
339
- return client.getObject(request).getObjectContent();
340
- }
341
-
342
- @Override
343
- public boolean isRetryableException(Exception exception)
344
- {
345
- return true; // TODO
346
- }
347
-
348
- @Override
349
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
350
- throws RetryGiveupException
351
- {
352
- String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
353
- retryCount, retryLimit, retryWait / 1000, exception.getMessage());
354
- if (retryCount % 3 == 0) {
355
- log.warn(message, exception);
356
- }
357
- else {
358
- log.warn(message);
359
- }
360
- }
361
-
362
- @Override
363
- public void onGiveup(Exception firstException, Exception lastException)
364
- throws RetryGiveupException
365
- {
366
- log.error("Giving up retry, first exception is [{}], last exception is [{}]", firstException.getMessage(), lastException.getMessage());
367
- }
368
- });
369
- }
370
- catch (RetryGiveupException ex) {
371
- Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
372
- throw Throwables.propagate(ex.getCause());
373
- }
374
- catch (InterruptedException ex) {
375
- throw new InterruptedIOException();
376
- }
403
+ log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
404
+ request.setRange(offset, contentLength - 1); // [first, last]
405
+
406
+ return new AlwaysRetryable<S3ObjectInputStream>("Opening the file") {
407
+ @Override
408
+ public S3ObjectInputStream call()
409
+ {
410
+ return client.getObject(request).getObjectContent();
411
+ }
412
+ }.executeWithCheckedException(retryExec, IOException.class);
377
413
  }
378
414
  }
379
415
 
@@ -408,12 +444,14 @@ public abstract class AbstractS3FileInputPlugin
408
444
  private AmazonS3 client;
409
445
  private final String bucket;
410
446
  private final Iterator<String> iterator;
447
+ private final RetryExecutor retryExec;
411
448
 
412
449
  public SingleFileProvider(PluginTask task, int taskIndex)
413
450
  {
414
451
  this.client = newS3Client(task);
415
452
  this.bucket = task.getBucket();
416
453
  this.iterator = task.getFiles().get(taskIndex).iterator();
454
+ this.retryExec = retryExecutorFrom(task);
417
455
  }
418
456
 
419
457
  @Override
@@ -427,7 +465,7 @@ public abstract class AbstractS3FileInputPlugin
427
465
  S3Object obj = client.getObject(request);
428
466
  long objectSize = obj.getObjectMetadata().getContentLength();
429
467
  LOGGER.info("Open S3Object with bucket [{}], key [{}], with size [{}]", bucket, key, objectSize);
430
- return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, objectSize));
468
+ return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, objectSize, retryExec));
431
469
  }
432
470
 
433
471
  @Override
@@ -0,0 +1,158 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import org.embulk.spi.Exec;
5
+ import org.embulk.spi.util.RetryExecutor;
6
+ import org.slf4j.Logger;
7
+
8
+ import java.util.concurrent.Callable;
9
+
10
+ import static java.lang.String.format;
11
+ import static org.embulk.spi.util.RetryExecutor.RetryGiveupException;
12
+ import static org.embulk.spi.util.RetryExecutor.Retryable;
13
+
14
+ /**
15
+ * Always retry, regardless the occurred exceptions,
16
+ * Also provide a default approach for exception propagation.
17
+ */
18
+ class AlwaysRetryable<T> implements Retryable<T>
19
+ {
20
+ private static final Logger log = Exec.getLogger(AlwaysRetryable.class);
21
+
22
+ private String operationName;
23
+ private Callable<T> callable;
24
+
25
+ /**
26
+ * @param operationName the name that will be referred on logging
27
+ */
28
+ public AlwaysRetryable(String operationName)
29
+ {
30
+ this.operationName = operationName;
31
+ }
32
+
33
+ /**
34
+ * @param operationName the name that will be referred on logging
35
+ * @param callable the operation, either define this at construction time or override the call() method
36
+ */
37
+ public AlwaysRetryable(String operationName, Callable<T> callable)
38
+ {
39
+ this.operationName = operationName;
40
+ this.callable = callable;
41
+ }
42
+
43
+ public AlwaysRetryable()
44
+ {
45
+ this("Anonymous operation");
46
+ }
47
+
48
+ public AlwaysRetryable(Callable<T> callable)
49
+ {
50
+ this("Anonymous operation", callable);
51
+ }
52
+
53
+ @Override
54
+ public T call() throws Exception
55
+ {
56
+ if (callable != null) {
57
+ return callable.call();
58
+ }
59
+ else {
60
+ throw new IllegalStateException("Either override call() or construct with a Runnable");
61
+ }
62
+ }
63
+
64
+ @Override
65
+ public boolean isRetryableException(Exception exception)
66
+ {
67
+ return true;
68
+ }
69
+
70
+ @Override
71
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
72
+ {
73
+ String message = format("%s failed. Retrying %d/%d after %d seconds. Message: %s",
74
+ operationName, retryCount, retryLimit, retryWait / 1000, exception.getMessage());
75
+ if (retryCount % retryLimit == 0) {
76
+ log.warn(message, exception);
77
+ }
78
+ else {
79
+ log.warn(message);
80
+ }
81
+ }
82
+
83
+ @Override
84
+ public void onGiveup(Exception firstException, Exception lastException)
85
+ {
86
+ // Exceptions would be propagated, so it's up to the caller to handle, this is just warning
87
+ log.warn("Giving up on retrying for {}, first exception is [{}], last exception is [{}]",
88
+ operationName, firstException.getMessage(), lastException.getMessage());
89
+ }
90
+
91
+ /**
92
+ * Run itself by the supplied executor,
93
+ *
94
+ * This propagates all exceptions (as unchecked) and unwrap RetryGiveupException for the original cause.
95
+ * If the original exception already is a RuntimeException, it will be propagated as is. If not, it will
96
+ * be wrapped around with a RuntimeException.
97
+ *
98
+ * For convenient, it execute normally without retrying when executor is null.
99
+ *
100
+ * @throws RuntimeException the original cause
101
+ */
102
+ public T executeWith(RetryExecutor executor)
103
+ {
104
+ if (executor == null) {
105
+ try {
106
+ return this.call();
107
+ }
108
+ catch (Exception e) {
109
+ Throwables.propagate(e);
110
+ }
111
+ }
112
+
113
+ try {
114
+ return executor.runInterruptible(this);
115
+ }
116
+ catch (RetryGiveupException e) {
117
+ throw Throwables.propagate(e.getCause());
118
+ }
119
+ catch (InterruptedException e) {
120
+ throw Throwables.propagate(e);
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Run itself by the supplied executor,
126
+ *
127
+ * Same as `executeWith`, this propagates all original exceptions. But `propagateAsIsException` will
128
+ * be re-throw without being wrapped on a RuntimeException, whether it is a checked or unchecked exception.
129
+ *
130
+ * For convenient, it execute normally without retrying when executor is null.
131
+ *
132
+ * @throws X whatever checked exception that you decided to propagate directly
133
+ * @throws RuntimeException wrap around whatever the original cause of failure (potentially thread interruption)
134
+ */
135
+ public <X extends Throwable> T executeWithCheckedException(RetryExecutor executor,
136
+ Class<X> propagateAsIsException) throws X
137
+ {
138
+ if (executor == null) {
139
+ try {
140
+ return this.call();
141
+ }
142
+ catch (Exception e) {
143
+ Throwables.propagate(e);
144
+ }
145
+ }
146
+
147
+ try {
148
+ return executor.runInterruptible(this);
149
+ }
150
+ catch (RetryGiveupException e) {
151
+ Throwables.propagateIfInstanceOf(e.getCause(), propagateAsIsException);
152
+ throw Throwables.propagate(e.getCause());
153
+ }
154
+ catch (InterruptedException e) {
155
+ throw Throwables.propagate(e);
156
+ }
157
+ }
158
+ }
@@ -0,0 +1,20 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.Task;
6
+
7
+ public interface RetrySupportPluginTask extends Task
8
+ {
9
+ @Config("maximum_retries")
10
+ @ConfigDefault("7")
11
+ int getMaximumRetries();
12
+
13
+ @Config("initial_retry_interval_millis")
14
+ @ConfigDefault("30000")
15
+ int getInitialRetryIntervalMillis();
16
+
17
+ @Config("maximum_retry_interval_millis")
18
+ @ConfigDefault("480000")
19
+ int getMaximumRetryIntervalMillis();
20
+ }
@@ -0,0 +1,115 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
5
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
6
+ import com.amazonaws.services.s3.model.ObjectListing;
7
+ import com.amazonaws.services.s3.model.ObjectMetadata;
8
+ import com.google.common.base.Optional;
9
+ import org.embulk.EmbulkTestRuntime;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.junit.Before;
12
+ import org.junit.Rule;
13
+ import org.junit.Test;
14
+
15
+ import static org.mockito.Matchers.any;
16
+ import static org.mockito.Mockito.doReturn;
17
+ import static org.mockito.Mockito.doThrow;
18
+ import static org.mockito.Mockito.mock;
19
+
20
+ public class TestAbstractS3FileInputPlugin
21
+ {
22
+ private static RetryExecutor retryExecutor()
23
+ {
24
+ return RetryExecutor.retryExecutor()
25
+ .withInitialRetryWait(0)
26
+ .withMaxRetryWait(0);
27
+ }
28
+
29
+ private static AbstractS3FileInputPlugin dummyS3Plugin()
30
+ {
31
+ return new AbstractS3FileInputPlugin()
32
+ {
33
+ @Override
34
+ protected Class<? extends PluginTask> getTaskClass()
35
+ {
36
+ return PluginTask.class;
37
+ }
38
+ };
39
+ }
40
+
41
+ private static class SomeException extends RuntimeException
42
+ {
43
+ }
44
+
45
+ @Rule
46
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
47
+
48
+ private AmazonS3 client;
49
+
50
+ @Before
51
+ public void createResources()
52
+ {
53
+ client = mock(AmazonS3.class);
54
+ }
55
+
56
+ @Test
57
+ public void listS3FilesByPrefix()
58
+ {
59
+ doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
60
+ FileList.Builder builder = new FileList.Builder();
61
+ dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
62
+ }
63
+
64
+ @Test
65
+ public void listS3FileByPrefix_with_retry()
66
+ {
67
+ doThrow(new RuntimeException()).doReturn(new ObjectListing())
68
+ .when(client).listObjects(any(ListObjectsRequest.class));
69
+ FileList.Builder builder = new FileList.Builder();
70
+ dummyS3Plugin().listS3FilesByPrefix(
71
+ builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
72
+ retryExecutor().withRetryLimit(1));
73
+ }
74
+
75
+ @Test(expected = SomeException.class)
76
+ public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
77
+ {
78
+ doThrow(new SomeException()).doReturn(new ObjectListing())
79
+ .when(client).listObjects(any(ListObjectsRequest.class));
80
+ FileList.Builder builder = new FileList.Builder();
81
+ dummyS3Plugin().listS3FilesByPrefix(
82
+ builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
83
+ retryExecutor().withRetryLimit(0));
84
+ }
85
+
86
+ @Test
87
+ public void addS3DirectObject()
88
+ {
89
+ doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
90
+ FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
91
+ dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
92
+ }
93
+
94
+ @Test
95
+ public void addS3DirectObject_with_retry()
96
+ {
97
+ doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
98
+ .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
99
+ FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
100
+ dummyS3Plugin().addS3DirectObject(
101
+ builder, client, "some_bucket", "some_prefix",
102
+ retryExecutor());
103
+ }
104
+
105
+ @Test(expected = SomeException.class)
106
+ public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
107
+ {
108
+ doThrow(new SomeException()).doReturn(new ObjectMetadata())
109
+ .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
110
+ FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
111
+ dummyS3Plugin().addS3DirectObject(
112
+ builder, client, "some_bucket", "some_prefix",
113
+ retryExecutor().withRetryLimit(0));
114
+ }
115
+ }
@@ -0,0 +1,131 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.spi.util.RetryExecutor;
5
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
6
+ import org.junit.Rule;
7
+ import org.junit.Test;
8
+
9
+ import java.io.IOException;
10
+ import java.util.concurrent.Callable;
11
+
12
+ import static java.lang.String.format;
13
+ import static org.msgpack.core.Preconditions.checkArgument;
14
+
15
+ public class TestAlwaysRetryable
16
+ {
17
+ @Rule
18
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); // require for AlwaysRetryable's logger
19
+
20
+ private static class Deny extends RuntimeException implements Callable
21
+ {
22
+ private int pastCalls = 0;
23
+ private final int targetCalls;
24
+ private Exception exception;
25
+
26
+ Deny(int targetCalls)
27
+ {
28
+ super(format("Try harder! (Will pass after %d calls)", targetCalls));
29
+ checkArgument(targetCalls >= 0);
30
+ this.targetCalls = targetCalls;
31
+ }
32
+
33
+ static Deny until(int calls)
34
+ {
35
+ return new Deny(calls);
36
+ }
37
+
38
+ Deny with(Exception exception)
39
+ {
40
+ this.exception = exception;
41
+ return this;
42
+ }
43
+
44
+ @Override
45
+ public Object call() throws Exception
46
+ {
47
+ if (pastCalls < targetCalls) {
48
+ pastCalls++;
49
+ if (exception != null) {
50
+ throw exception;
51
+ }
52
+ else {
53
+ throw this;
54
+ }
55
+ }
56
+ pastCalls++;
57
+ return null;
58
+ }
59
+ }
60
+
61
+ private static RetryExecutor retryExecutor()
62
+ {
63
+ return RetryExecutor.retryExecutor()
64
+ .withInitialRetryWait(0)
65
+ .withMaxRetryWait(0);
66
+ }
67
+
68
+ @Test
69
+ @SuppressWarnings("unchecked")
70
+ public void guarantee_retry_attempts_just_like_Retryable() throws Exception
71
+ {
72
+ retryExecutor()
73
+ .withRetryLimit(0)
74
+ .run(new AlwaysRetryable(Deny.until(0)));
75
+ retryExecutor()
76
+ .withRetryLimit(1)
77
+ .run(new AlwaysRetryable(Deny.until(1)));
78
+ retryExecutor()
79
+ .withRetryLimit(2)
80
+ .run(new AlwaysRetryable(Deny.until(1)));
81
+ retryExecutor()
82
+ .withRetryLimit(3)
83
+ .run(new AlwaysRetryable(Deny.until(2)));
84
+ }
85
+
86
+ @Test(expected = RetryGiveupException.class)
87
+ @SuppressWarnings("unchecked")
88
+ public void fail_after_exceeding_attempts_just_like_Retryable() throws Exception
89
+ {
90
+ retryExecutor()
91
+ .withRetryLimit(3)
92
+ .run(new AlwaysRetryable(Deny.until(4)));
93
+ }
94
+
95
+ @Test(expected = Deny.class)
96
+ @SuppressWarnings("unchecked")
97
+ public void execute_should_unwrap_RetryGiveupException() throws Exception
98
+ {
99
+ new AlwaysRetryable(Deny.until(4))
100
+ .executeWith(retryExecutor().withRetryLimit(3));
101
+ }
102
+
103
+ @Test(expected = RuntimeException.class)
104
+ @SuppressWarnings("unchecked")
105
+ public void execute_should_unwrap_RetryGiveupException_but_rewrap_checked_exception_in_a_RuntimeException()
106
+ {
107
+ new AlwaysRetryable(Deny.until(4).with(new Exception("A checked exception")))
108
+ .executeWith(retryExecutor().withRetryLimit(3));
109
+ }
110
+
111
+ @Test(expected = IOException.class)
112
+ public void executeAndPropagateAsIs_should_leave_original_exception_unwrapped() throws IOException
113
+ {
114
+ RetryExecutor retryExc = retryExecutor().withRetryLimit(3);
115
+ // An explicit type parameter for operation return type is needed here,
116
+ // Without one, javac (at least on 1.8) will fails to infer the X exception type parameter.
117
+ new AlwaysRetryable<Object>() {
118
+ @Override
119
+ public Object call() throws IOException
120
+ {
121
+ throw new IOException();
122
+ }
123
+ }.executeWithCheckedException(retryExc, IOException.class);
124
+ }
125
+
126
+ @Test(expected = IllegalStateException.class)
127
+ public void execute_without_an_implementation_should_throw_an_IllegalStateException()
128
+ {
129
+ new AlwaysRetryable().executeWith(retryExecutor());
130
+ }
131
+ }
@@ -161,6 +161,28 @@ public class TestS3FileInputPlugin
161
161
  }
162
162
  }
163
163
 
164
+ @Test
165
+ public void usePath()
166
+ {
167
+ ConfigSource config = this.config.deepCopy()
168
+ .set("path", String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX))
169
+ .set("path_prefix", null);
170
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
171
+ assertEquals(String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX), configDiff.get(String.class, "last_path"));
172
+ assertRecords(config, output);
173
+ }
174
+
175
+ @Test
176
+ public void usePathAsHighPriorityThanPathPrefix()
177
+ {
178
+ ConfigSource config = this.config.deepCopy()
179
+ .set("path", String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX))
180
+ .set("path_prefix", "foo"); // path_prefix has the bad value, if path_prefix is chosen, expected result will be failed
181
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
182
+ assertEquals(String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX), configDiff.get(String.class, "last_path"));
183
+ assertRecords(config, output);
184
+ }
185
+
164
186
  @Test
165
187
  public void configuredEndpoint()
166
188
  {
@@ -1,5 +1,6 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
+ import com.amazonaws.AmazonClientException;
3
4
  import com.amazonaws.services.s3.AmazonS3;
4
5
  import com.amazonaws.services.s3.model.GetObjectRequest;
5
6
  import com.amazonaws.services.s3.model.ObjectMetadata;
@@ -16,7 +17,9 @@ import java.io.ByteArrayInputStream;
16
17
  import java.io.InputStream;
17
18
  import java.io.InputStreamReader;
18
19
 
20
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
19
21
  import static org.junit.Assert.assertEquals;
22
+ import static org.junit.Assert.fail;
20
23
  import static org.mockito.Matchers.any;
21
24
  import static org.mockito.Mockito.doReturn;
22
25
  import static org.mockito.Mockito.doThrow;
@@ -55,7 +58,13 @@ public class TestS3InputStreamReopener
55
58
  { // retry once
56
59
  doThrow(new RuntimeException()).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
57
60
 
58
- S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
61
+ S3InputStreamReopener opener = new S3InputStreamReopener(
62
+ client,
63
+ new GetObjectRequest("my_bucket", "in/aa/a"),
64
+ content.length(),
65
+ retryExecutor()
66
+ .withInitialRetryWait(0)
67
+ .withRetryLimit(1));
59
68
 
60
69
  try (InputStream in = opener.reopen(0, new RuntimeException())) {
61
70
  BufferedReader r = new BufferedReader(new InputStreamReader(in));
@@ -64,6 +73,42 @@ public class TestS3InputStreamReopener
64
73
  }
65
74
  }
66
75
 
76
+ @Test(expected = AmazonClientException.class)
77
+ public void reopenS3FileByReopener_on_retry_gave_up_should_throw_original_exception() throws Exception
78
+ {
79
+ String content = "value";
80
+ doThrow(new AmazonClientException("no")).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
81
+
82
+ S3InputStreamReopener opener = new S3InputStreamReopener(
83
+ client,
84
+ new GetObjectRequest("my_bucket", "in/aa/a"),
85
+ content.length(),
86
+ retryExecutor()
87
+ .withInitialRetryWait(0)
88
+ .withRetryLimit(0));
89
+
90
+ opener.reopen(0, new RuntimeException());
91
+ }
92
+
93
+ @Test(expected = AmazonClientException.class)
94
+ public void reopenS3FileByReopener_on_retry_always_throw_exception()
95
+ throws Exception
96
+ {
97
+ // always failed call with 2 retries
98
+ doThrow(new AmazonClientException("This exception is thrown when retrying.")).when(client).getObject(any(GetObjectRequest.class));
99
+ S3InputStreamReopener opener = new S3InputStreamReopener(
100
+ client,
101
+ new GetObjectRequest("my_bucket", "in/aa/a"),
102
+ "value".length(),
103
+ retryExecutor()
104
+ .withInitialRetryWait(0)
105
+ .withRetryLimit(2));
106
+
107
+ try (InputStream in = opener.reopen(0, new AmazonClientException("This exception can be ignored"))) {
108
+ fail("Should throw exception.");
109
+ }
110
+ }
111
+
67
112
  static S3Object s3object(String key, String value)
68
113
  {
69
114
  S3Object o = new S3Object();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.16
4
+ version: 0.2.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-11 00:00:00.000000000 Z
11
+ date: 2018-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -48,10 +48,13 @@ files:
48
48
  - build.gradle
49
49
  - lib/embulk/input/s3.rb
50
50
  - src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
51
+ - src/main/java/org/embulk/input/s3/AlwaysRetryable.java
51
52
  - src/main/java/org/embulk/input/s3/FileList.java
52
53
  - src/main/java/org/embulk/input/s3/HttpProxy.java
54
+ - src/main/java/org/embulk/input/s3/RetrySupportPluginTask.java
53
55
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
54
- - src/main/java/org/embulk/input/s3/S3FileInputUtils.java
56
+ - src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java
57
+ - src/test/java/org/embulk/input/s3/TestAlwaysRetryable.java
55
58
  - src/test/java/org/embulk/input/s3/TestAwsCredentials.java
56
59
  - src/test/java/org/embulk/input/s3/TestFileList.java
57
60
  - src/test/java/org/embulk/input/s3/TestHttpProxy.java
@@ -62,8 +65,8 @@ files:
62
65
  - classpath/aws-java-sdk-kms-1.11.253.jar
63
66
  - classpath/aws-java-sdk-s3-1.11.253.jar
64
67
  - classpath/commons-codec-1.9.jar
65
- - classpath/embulk-input-s3-0.2.16.jar
66
- - classpath/embulk-util-aws-credentials-0.2.16.jar
68
+ - classpath/embulk-input-s3-0.2.19.jar
69
+ - classpath/embulk-util-aws-credentials-0.2.19.jar
67
70
  - classpath/httpclient-4.5.2.jar
68
71
  - classpath/httpcore-4.4.4.jar
69
72
  - classpath/ion-java-1.0.2.jar
Binary file
@@ -1,51 +0,0 @@
1
- package org.embulk.input.s3;
2
-
3
- import org.embulk.spi.Exec;
4
- import org.embulk.spi.util.RetryExecutor;
5
- import org.slf4j.Logger;
6
-
7
- /**
8
- * Utility class for S3 File Input.
9
- */
10
- public final class S3FileInputUtils
11
- {
12
- private S3FileInputUtils()
13
- {
14
- }
15
-
16
- public static final <T> T executeWithRetry(int maximumRetries, int initialRetryIntervalMillis, int maximumRetryIntervalMillis, AlwaysRetryRetryable<T> alwaysRetryRetryable)
17
- throws RetryExecutor.RetryGiveupException, InterruptedException
18
- {
19
- return RetryExecutor.retryExecutor()
20
- .withRetryLimit(maximumRetries)
21
- .withInitialRetryWait(initialRetryIntervalMillis)
22
- .withMaxRetryWait(maximumRetryIntervalMillis)
23
- .runInterruptible(alwaysRetryRetryable);
24
- }
25
-
26
- public abstract static class AlwaysRetryRetryable<T> implements RetryExecutor.Retryable<T>
27
- {
28
- private static final Logger LOGGER = Exec.getLogger(AlwaysRetryRetryable.class);
29
-
30
- @Override
31
- public abstract T call() throws Exception;
32
-
33
- @Override
34
- public boolean isRetryableException(Exception exception)
35
- {
36
- return true;
37
- }
38
-
39
- @Override
40
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryExecutor.RetryGiveupException
41
- {
42
- LOGGER.info("Retry [{}]/[{}] with retryWait [{}] on exception {}", retryCount, retryLimit, retryWait, exception.getMessage());
43
- }
44
-
45
- @Override
46
- public void onGiveup(Exception firstException, Exception lastException) throws RetryExecutor.RetryGiveupException
47
- {
48
- LOGGER.error("Giving up retry, first exception is [{}], last exception is [{}]", firstException.getMessage(), lastException.getMessage());
49
- }
50
- }
51
- }