embulk-input-s3 0.2.16 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95a2e3a70de76cdd69c0b0252f37b32671ce3896
4
- data.tar.gz: 0f7a8a710a28478d528df8a31d08b996a0595772
3
+ metadata.gz: 3c9e972f5326281de40fc46190dc3d268282b34e
4
+ data.tar.gz: 0c63221f881710e6c4a18dbf10ab73be701193ff
5
5
  SHA512:
6
- metadata.gz: 6ae53adc0606da4ec1e2a72a742b18b27ea40145b27baf5375e905670f243b411d43e15a55fdeb2172cc158e1e83b257f290a79d2ff92aa890f9ab57e38d2174
7
- data.tar.gz: 332d2963cfde7edf02ed80b9fda7d651a12c490e03f7f2984cee329858b978ebd5c2d5401e9ea30efc5e4a55e66ccd6d87d285a908bd3a4819dffa50de52591e
6
+ metadata.gz: 3983830d6e700a3b3e0a2cdadd063613c4ff41b39e01c96c00c2f8a33515fd8d8be601c644184119bb87e46d1eab394869e91cf1c557c6420ef022f40a93b31e
7
+ data.tar.gz: 27471ec5bbc698865ab44d2d4ca94b2074c7589c5c9c0ce919b3c3b5bc18b885b6566b48fb74803c99abc10c04716771a1b9faa231ff780de3740da55b468358
@@ -6,15 +6,17 @@ import com.amazonaws.Protocol;
6
6
  import com.amazonaws.auth.AWSCredentialsProvider;
7
7
  import com.amazonaws.services.s3.AmazonS3;
8
8
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
9
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
9
10
  import com.amazonaws.services.s3.model.GetObjectRequest;
10
11
  import com.amazonaws.services.s3.model.ListObjectsRequest;
11
12
  import com.amazonaws.services.s3.model.ObjectListing;
13
+ import com.amazonaws.services.s3.model.ObjectMetadata;
12
14
  import com.amazonaws.services.s3.model.S3Object;
15
+ import com.amazonaws.services.s3.model.S3ObjectInputStream;
13
16
  import com.amazonaws.services.s3.model.S3ObjectSummary;
14
17
  import com.amazonaws.services.s3.model.StorageClass;
15
18
  import com.google.common.annotations.VisibleForTesting;
16
19
  import com.google.common.base.Optional;
17
- import com.google.common.base.Throwables;
18
20
  import org.embulk.config.Config;
19
21
  import org.embulk.config.ConfigDefault;
20
22
  import org.embulk.config.ConfigDiff;
@@ -30,15 +32,13 @@ import org.embulk.spi.FileInputPlugin;
30
32
  import org.embulk.spi.TransactionalFileInput;
31
33
  import org.embulk.spi.util.InputStreamFileInput;
32
34
  import org.embulk.spi.util.ResumableInputStream;
33
- import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
34
- import org.embulk.spi.util.RetryExecutor.Retryable;
35
+ import org.embulk.spi.util.RetryExecutor;
35
36
  import org.embulk.util.aws.credentials.AwsCredentials;
36
37
  import org.embulk.util.aws.credentials.AwsCredentialsTask;
37
38
  import org.slf4j.Logger;
38
39
 
39
40
  import java.io.IOException;
40
41
  import java.io.InputStream;
41
- import java.io.InterruptedIOException;
42
42
  import java.util.Iterator;
43
43
  import java.util.List;
44
44
 
@@ -50,13 +50,18 @@ public abstract class AbstractS3FileInputPlugin
50
50
  private static final Logger LOGGER = Exec.getLogger(S3FileInputPlugin.class);
51
51
 
52
52
  public interface PluginTask
53
- extends AwsCredentialsTask, FileList.Task, Task
53
+ extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
54
54
  {
55
55
  @Config("bucket")
56
56
  public String getBucket();
57
57
 
58
58
  @Config("path_prefix")
59
- public String getPathPrefix();
59
+ @ConfigDefault("null")
60
+ public Optional<String> getPathPrefix();
61
+
62
+ @Config("path")
63
+ @ConfigDefault("null")
64
+ public Optional<String> getPath();
60
65
 
61
66
  @Config("last_path")
62
67
  @ConfigDefault("null")
@@ -97,6 +102,7 @@ public abstract class AbstractS3FileInputPlugin
97
102
  {
98
103
  PluginTask task = config.loadConfig(getTaskClass());
99
104
 
105
+ validateInputTask(task);
100
106
  // list files recursively
101
107
  task.setFiles(listFiles(task));
102
108
 
@@ -208,21 +214,42 @@ public abstract class AbstractS3FileInputPlugin
208
214
  }
209
215
  }
210
216
 
211
- private FileList listFiles(PluginTask task)
217
+ /**
218
+ * Build the common retry executor from some configuration params of plugin task.
219
+ * @param task Plugin task.
220
+ * @return RetryExecutor object
221
+ */
222
+ private static RetryExecutor retryExecutorFrom(RetrySupportPluginTask task)
223
+ {
224
+ return retryExecutor()
225
+ .withRetryLimit(task.getMaximumRetries())
226
+ .withInitialRetryWait(task.getInitialRetryIntervalMillis())
227
+ .withMaxRetryWait(task.getMaximumRetryIntervalMillis());
228
+ }
229
+
230
+ private FileList listFiles(final PluginTask task)
212
231
  {
213
- LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix());
214
232
  try {
215
233
  AmazonS3 client = newS3Client(task);
216
234
  String bucketName = task.getBucket();
235
+ FileList.Builder builder = new FileList.Builder(task);
236
+ RetryExecutor retryExec = retryExecutorFrom(task);
237
+ if (task.getPath().isPresent()) {
238
+ LOGGER.info("Start getting object with path: [{}]", task.getPath().get());
239
+ addS3DirectObject(builder, client, task.getBucket(), task.getPath().get(), retryExec);
240
+ }
241
+ else {
242
+ // does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
243
+ LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
244
+ if (task.getPathPrefix().get().equals("/")) {
245
+ LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
246
+ }
217
247
 
218
- if (task.getPathPrefix().equals("/")) {
219
- LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
248
+ listS3FilesByPrefix(builder, client, bucketName,
249
+ task.getPathPrefix().get(), task.getLastPath(), task.getSkipGlacierObjects(), retryExec);
250
+ LOGGER.info("Found total [{}] files", builder.size());
220
251
  }
221
252
 
222
- FileList.Builder builder = new FileList.Builder(task);
223
- listS3FilesByPrefix(builder, client, bucketName,
224
- task.getPathPrefix(), task.getLastPath(), task.getSkipGlacierObjects());
225
- LOGGER.info("Found total [{}] files", builder.size());
226
253
  return builder.build();
227
254
  }
228
255
  catch (AmazonServiceException ex) {
@@ -236,11 +263,55 @@ public abstract class AbstractS3FileInputPlugin
236
263
  }
237
264
  throw ex;
238
265
  }
239
- catch (InterruptedException | RetryGiveupException ex) {
240
- throw new RuntimeException(ex);
266
+ }
267
+
268
+ @VisibleForTesting
269
+ public void addS3DirectObject(FileList.Builder builder,
270
+ final AmazonS3 client,
271
+ String bucket,
272
+ String objectKey)
273
+ {
274
+ addS3DirectObject(builder, client, bucket, objectKey, null);
275
+ }
276
+
277
+ @VisibleForTesting
278
+ public void addS3DirectObject(FileList.Builder builder,
279
+ final AmazonS3 client,
280
+ String bucket,
281
+ String objectKey,
282
+ RetryExecutor retryExec)
283
+ {
284
+ final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucket, objectKey);
285
+
286
+ ObjectMetadata objectMetadata = new AlwaysRetryable<ObjectMetadata>("Looking up for a single object") {
287
+ @Override
288
+ public ObjectMetadata call()
289
+ {
290
+ return client.getObjectMetadata(objectMetadataRequest);
291
+ }
292
+ }.executeWith(retryExec);
293
+
294
+ builder.add(objectKey, objectMetadata.getContentLength());
295
+ }
296
+
297
+ private void validateInputTask(PluginTask task)
298
+ {
299
+ if (!task.getPathPrefix().isPresent() && !task.getPath().isPresent()) {
300
+ throw new ConfigException("Either path or path_prefix is required");
241
301
  }
242
302
  }
243
303
 
304
+ @VisibleForTesting
305
+ public static void listS3FilesByPrefix(FileList.Builder builder,
306
+ final AmazonS3 client,
307
+ String bucketName,
308
+ String prefix,
309
+ Optional<String> lastPath,
310
+ boolean skipGlacierObjects)
311
+ {
312
+ listS3FilesByPrefix(builder, client, bucketName, prefix, lastPath, skipGlacierObjects, null);
313
+ }
314
+
244
315
  /**
245
316
  * Lists S3 filenames filtered by prefix.
246
317
  * <p>
@@ -251,30 +322,28 @@ public abstract class AbstractS3FileInputPlugin
251
322
  * @param prefix Amazon S3 bucket name prefix
252
323
  * @param lastPath last path
253
324
  * @param skipGlacierObjects skip gracier objects
254
- * @throws RetryGiveupException error when retrying
255
- * @throws InterruptedException error when retrying
325
+ * @param retryExec a retry executor object to do the retrying
256
326
  */
327
+ @VisibleForTesting
257
328
  public static void listS3FilesByPrefix(FileList.Builder builder,
258
- final AmazonS3 client, final String bucketName,
259
- final String prefix, Optional<String> lastPath, boolean skipGlacierObjects) throws RetryGiveupException, InterruptedException
329
+ final AmazonS3 client,
330
+ String bucketName,
331
+ String prefix,
332
+ Optional<String> lastPath,
333
+ boolean skipGlacierObjects,
334
+ RetryExecutor retryExec)
260
335
  {
261
336
  String lastKey = lastPath.orNull();
262
337
  do {
263
338
  final String finalLastKey = lastKey;
264
- Optional<ObjectListing> optOl = S3FileInputUtils.executeWithRetry(3, 500, 30 * 1000, new S3FileInputUtils.AlwaysRetryRetryable<Optional<ObjectListing>>()
265
- {
339
+ final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
340
+ ObjectListing ol = new AlwaysRetryable<ObjectListing>("Listing objects") {
266
341
  @Override
267
- public Optional<ObjectListing> call() throws AmazonServiceException
342
+ public ObjectListing call()
268
343
  {
269
- ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
270
- ObjectListing ol = client.listObjects(req);
271
- return Optional.of(ol);
344
+ return client.listObjects(req);
272
345
  }
273
- });
274
- if (!optOl.isPresent()) {
275
- break;
276
- }
277
- ObjectListing ol = optOl.get();
346
+ }.executeWith(retryExec);
278
347
  for (S3ObjectSummary s : ol.getObjectSummaries()) {
279
348
  if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
280
349
  if (skipGlacierObjects) {
@@ -313,67 +382,34 @@ public abstract class AbstractS3FileInputPlugin
313
382
  private final AmazonS3 client;
314
383
  private final GetObjectRequest request;
315
384
  private final long contentLength;
385
+ private final RetryExecutor retryExec;
316
386
 
317
387
  public S3InputStreamReopener(AmazonS3 client, GetObjectRequest request, long contentLength)
388
+ {
389
+ this(client, request, contentLength, null);
390
+ }
391
+
392
+ public S3InputStreamReopener(AmazonS3 client, GetObjectRequest request, long contentLength, RetryExecutor retryExec)
318
393
  {
319
394
  this.client = client;
320
395
  this.request = request;
321
396
  this.contentLength = contentLength;
397
+ this.retryExec = retryExec;
322
398
  }
323
399
 
324
400
  @Override
325
401
  public InputStream reopen(final long offset, final Exception closedCause) throws IOException
326
402
  {
327
- try {
328
- return retryExecutor()
329
- .withRetryLimit(3)
330
- .withInitialRetryWait(500)
331
- .withMaxRetryWait(30 * 1000)
332
- .runInterruptible(new Retryable<InputStream>()
333
- {
334
- @Override
335
- public InputStream call() throws InterruptedIOException
336
- {
337
- log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
338
- request.setRange(offset, contentLength - 1); // [first, last]
339
- return client.getObject(request).getObjectContent();
340
- }
341
-
342
- @Override
343
- public boolean isRetryableException(Exception exception)
344
- {
345
- return true; // TODO
346
- }
347
-
348
- @Override
349
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
350
- throws RetryGiveupException
351
- {
352
- String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
353
- retryCount, retryLimit, retryWait / 1000, exception.getMessage());
354
- if (retryCount % 3 == 0) {
355
- log.warn(message, exception);
356
- }
357
- else {
358
- log.warn(message);
359
- }
360
- }
361
-
362
- @Override
363
- public void onGiveup(Exception firstException, Exception lastException)
364
- throws RetryGiveupException
365
- {
366
- log.error("Giving up retry, first exception is [{}], last exception is [{}]", firstException.getMessage(), lastException.getMessage());
367
- }
368
- });
369
- }
370
- catch (RetryGiveupException ex) {
371
- Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
372
- throw Throwables.propagate(ex.getCause());
373
- }
374
- catch (InterruptedException ex) {
375
- throw new InterruptedIOException();
376
- }
403
+ log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
404
+ request.setRange(offset, contentLength - 1); // [first, last]
405
+
406
+ return new AlwaysRetryable<S3ObjectInputStream>("Opening the file") {
407
+ @Override
408
+ public S3ObjectInputStream call()
409
+ {
410
+ return client.getObject(request).getObjectContent();
411
+ }
412
+ }.executeWithCheckedException(retryExec, IOException.class);
377
413
  }
378
414
  }
379
415
 
@@ -408,12 +444,14 @@ public abstract class AbstractS3FileInputPlugin
408
444
  private AmazonS3 client;
409
445
  private final String bucket;
410
446
  private final Iterator<String> iterator;
447
+ private final RetryExecutor retryExec;
411
448
 
412
449
  public SingleFileProvider(PluginTask task, int taskIndex)
413
450
  {
414
451
  this.client = newS3Client(task);
415
452
  this.bucket = task.getBucket();
416
453
  this.iterator = task.getFiles().get(taskIndex).iterator();
454
+ this.retryExec = retryExecutorFrom(task);
417
455
  }
418
456
 
419
457
  @Override
@@ -427,7 +465,7 @@ public abstract class AbstractS3FileInputPlugin
427
465
  S3Object obj = client.getObject(request);
428
466
  long objectSize = obj.getObjectMetadata().getContentLength();
429
467
  LOGGER.info("Open S3Object with bucket [{}], key [{}], with size [{}]", bucket, key, objectSize);
430
- return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, objectSize));
468
+ return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, objectSize, retryExec));
431
469
  }
432
470
 
433
471
  @Override
@@ -0,0 +1,158 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import org.embulk.spi.Exec;
5
+ import org.embulk.spi.util.RetryExecutor;
6
+ import org.slf4j.Logger;
7
+
8
+ import java.util.concurrent.Callable;
9
+
10
+ import static java.lang.String.format;
11
+ import static org.embulk.spi.util.RetryExecutor.RetryGiveupException;
12
+ import static org.embulk.spi.util.RetryExecutor.Retryable;
13
+
14
+ /**
15
+ * Always retry, regardless the occurred exceptions,
16
+ * Also provide a default approach for exception propagation.
17
+ */
18
+ class AlwaysRetryable<T> implements Retryable<T>
19
+ {
20
+ private static final Logger log = Exec.getLogger(AlwaysRetryable.class);
21
+
22
+ private String operationName;
23
+ private Callable<T> callable;
24
+
25
+ /**
26
+ * @param operationName the name that will be referred on logging
27
+ */
28
+ public AlwaysRetryable(String operationName)
29
+ {
30
+ this.operationName = operationName;
31
+ }
32
+
33
+ /**
34
+ * @param operationName the name that will be referred on logging
35
+ * @param callable the operation, either define this at construction time or override the call() method
36
+ */
37
+ public AlwaysRetryable(String operationName, Callable<T> callable)
38
+ {
39
+ this.operationName = operationName;
40
+ this.callable = callable;
41
+ }
42
+
43
+ public AlwaysRetryable()
44
+ {
45
+ this("Anonymous operation");
46
+ }
47
+
48
+ public AlwaysRetryable(Callable<T> callable)
49
+ {
50
+ this("Anonymous operation", callable);
51
+ }
52
+
53
+ @Override
54
+ public T call() throws Exception
55
+ {
56
+ if (callable != null) {
57
+ return callable.call();
58
+ }
59
+ else {
60
+ throw new IllegalStateException("Either override call() or construct with a Runnable");
61
+ }
62
+ }
63
+
64
+ @Override
65
+ public boolean isRetryableException(Exception exception)
66
+ {
67
+ return true;
68
+ }
69
+
70
+ @Override
71
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
72
+ {
73
+ String message = format("%s failed. Retrying %d/%d after %d seconds. Message: %s",
74
+ operationName, retryCount, retryLimit, retryWait / 1000, exception.getMessage());
75
+ if (retryCount % retryLimit == 0) {
76
+ log.warn(message, exception);
77
+ }
78
+ else {
79
+ log.warn(message);
80
+ }
81
+ }
82
+
83
+ @Override
84
+ public void onGiveup(Exception firstException, Exception lastException)
85
+ {
86
+ // Exceptions would be propagated, so it's up to the caller to handle, this is just warning
87
+ log.warn("Giving up on retrying for {}, first exception is [{}], last exception is [{}]",
88
+ operationName, firstException.getMessage(), lastException.getMessage());
89
+ }
90
+
91
+ /**
92
+ * Run itself by the supplied executor,
93
+ *
94
+ * This propagates all exceptions (as unchecked) and unwrap RetryGiveupException for the original cause.
95
+ * If the original exception already is a RuntimeException, it will be propagated as is. If not, it will
96
+ * be wrapped around with a RuntimeException.
97
+ *
98
+ * For convenient, it execute normally without retrying when executor is null.
99
+ *
100
+ * @throws RuntimeException the original cause
101
+ */
102
+ public T executeWith(RetryExecutor executor)
103
+ {
104
+ if (executor == null) {
105
+ try {
106
+ return this.call();
107
+ }
108
+ catch (Exception e) {
109
+ Throwables.propagate(e);
110
+ }
111
+ }
112
+
113
+ try {
114
+ return executor.runInterruptible(this);
115
+ }
116
+ catch (RetryGiveupException e) {
117
+ throw Throwables.propagate(e.getCause());
118
+ }
119
+ catch (InterruptedException e) {
120
+ throw Throwables.propagate(e);
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Run itself by the supplied executor,
126
+ *
127
+ * Same as `executeWith`, this propagates all original exceptions. But `propagateAsIsException` will
128
+ * be re-throw without being wrapped on a RuntimeException, whether it is a checked or unchecked exception.
129
+ *
130
+ * For convenient, it execute normally without retrying when executor is null.
131
+ *
132
+ * @throws X whatever checked exception that you decided to propagate directly
133
+ * @throws RuntimeException wrap around whatever the original cause of failure (potentially thread interruption)
134
+ */
135
+ public <X extends Throwable> T executeWithCheckedException(RetryExecutor executor,
136
+ Class<X> propagateAsIsException) throws X
137
+ {
138
+ if (executor == null) {
139
+ try {
140
+ return this.call();
141
+ }
142
+ catch (Exception e) {
143
+ Throwables.propagate(e);
144
+ }
145
+ }
146
+
147
+ try {
148
+ return executor.runInterruptible(this);
149
+ }
150
+ catch (RetryGiveupException e) {
151
+ Throwables.propagateIfInstanceOf(e.getCause(), propagateAsIsException);
152
+ throw Throwables.propagate(e.getCause());
153
+ }
154
+ catch (InterruptedException e) {
155
+ throw Throwables.propagate(e);
156
+ }
157
+ }
158
+ }
@@ -0,0 +1,20 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import org.embulk.config.Config;
4
+ import org.embulk.config.ConfigDefault;
5
+ import org.embulk.config.Task;
6
+
7
+ public interface RetrySupportPluginTask extends Task
8
+ {
9
+ @Config("maximum_retries")
10
+ @ConfigDefault("7")
11
+ int getMaximumRetries();
12
+
13
+ @Config("initial_retry_interval_millis")
14
+ @ConfigDefault("30000")
15
+ int getInitialRetryIntervalMillis();
16
+
17
+ @Config("maximum_retry_interval_millis")
18
+ @ConfigDefault("480000")
19
+ int getMaximumRetryIntervalMillis();
20
+ }
@@ -0,0 +1,115 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
5
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
6
+ import com.amazonaws.services.s3.model.ObjectListing;
7
+ import com.amazonaws.services.s3.model.ObjectMetadata;
8
+ import com.google.common.base.Optional;
9
+ import org.embulk.EmbulkTestRuntime;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.junit.Before;
12
+ import org.junit.Rule;
13
+ import org.junit.Test;
14
+
15
+ import static org.mockito.Matchers.any;
16
+ import static org.mockito.Mockito.doReturn;
17
+ import static org.mockito.Mockito.doThrow;
18
+ import static org.mockito.Mockito.mock;
19
+
20
+ public class TestAbstractS3FileInputPlugin
21
+ {
22
+ private static RetryExecutor retryExecutor()
23
+ {
24
+ return RetryExecutor.retryExecutor()
25
+ .withInitialRetryWait(0)
26
+ .withMaxRetryWait(0);
27
+ }
28
+
29
+ private static AbstractS3FileInputPlugin dummyS3Plugin()
30
+ {
31
+ return new AbstractS3FileInputPlugin()
32
+ {
33
+ @Override
34
+ protected Class<? extends PluginTask> getTaskClass()
35
+ {
36
+ return PluginTask.class;
37
+ }
38
+ };
39
+ }
40
+
41
+ private static class SomeException extends RuntimeException
42
+ {
43
+ }
44
+
45
+ @Rule
46
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
47
+
48
+ private AmazonS3 client;
49
+
50
+ @Before
51
+ public void createResources()
52
+ {
53
+ client = mock(AmazonS3.class);
54
+ }
55
+
56
+ @Test
57
+ public void listS3FilesByPrefix()
58
+ {
59
+ doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
60
+ FileList.Builder builder = new FileList.Builder();
61
+ dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
62
+ }
63
+
64
+ @Test
65
+ public void listS3FileByPrefix_with_retry()
66
+ {
67
+ doThrow(new RuntimeException()).doReturn(new ObjectListing())
68
+ .when(client).listObjects(any(ListObjectsRequest.class));
69
+ FileList.Builder builder = new FileList.Builder();
70
+ dummyS3Plugin().listS3FilesByPrefix(
71
+ builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
72
+ retryExecutor().withRetryLimit(1));
73
+ }
74
+
75
+ @Test(expected = SomeException.class)
76
+ public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
77
+ {
78
+ doThrow(new SomeException()).doReturn(new ObjectListing())
79
+ .when(client).listObjects(any(ListObjectsRequest.class));
80
+ FileList.Builder builder = new FileList.Builder();
81
+ dummyS3Plugin().listS3FilesByPrefix(
82
+ builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
83
+ retryExecutor().withRetryLimit(0));
84
+ }
85
+
86
+ @Test
87
+ public void addS3DirectObject()
88
+ {
89
+ doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
90
+ FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
91
+ dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
92
+ }
93
+
94
+ @Test
95
+ public void addS3DirectObject_with_retry()
96
+ {
97
+ doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
98
+ .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
99
+ FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
100
+ dummyS3Plugin().addS3DirectObject(
101
+ builder, client, "some_bucket", "some_prefix",
102
+ retryExecutor());
103
+ }
104
+
105
+ @Test(expected = SomeException.class)
106
+ public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
107
+ {
108
+ doThrow(new SomeException()).doReturn(new ObjectMetadata())
109
+ .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
110
+ FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
111
+ dummyS3Plugin().addS3DirectObject(
112
+ builder, client, "some_bucket", "some_prefix",
113
+ retryExecutor().withRetryLimit(0));
114
+ }
115
+ }
@@ -0,0 +1,131 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.spi.util.RetryExecutor;
5
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
6
+ import org.junit.Rule;
7
+ import org.junit.Test;
8
+
9
+ import java.io.IOException;
10
+ import java.util.concurrent.Callable;
11
+
12
+ import static java.lang.String.format;
13
+ import static org.msgpack.core.Preconditions.checkArgument;
14
+
15
+ public class TestAlwaysRetryable
16
+ {
17
+ @Rule
18
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); // require for AlwaysRetryable's logger
19
+
20
+ private static class Deny extends RuntimeException implements Callable
21
+ {
22
+ private int pastCalls = 0;
23
+ private final int targetCalls;
24
+ private Exception exception;
25
+
26
+ Deny(int targetCalls)
27
+ {
28
+ super(format("Try harder! (Will pass after %d calls)", targetCalls));
29
+ checkArgument(targetCalls >= 0);
30
+ this.targetCalls = targetCalls;
31
+ }
32
+
33
+ static Deny until(int calls)
34
+ {
35
+ return new Deny(calls);
36
+ }
37
+
38
+ Deny with(Exception exception)
39
+ {
40
+ this.exception = exception;
41
+ return this;
42
+ }
43
+
44
+ @Override
45
+ public Object call() throws Exception
46
+ {
47
+ if (pastCalls < targetCalls) {
48
+ pastCalls++;
49
+ if (exception != null) {
50
+ throw exception;
51
+ }
52
+ else {
53
+ throw this;
54
+ }
55
+ }
56
+ pastCalls++;
57
+ return null;
58
+ }
59
+ }
60
+
61
+ private static RetryExecutor retryExecutor()
62
+ {
63
+ return RetryExecutor.retryExecutor()
64
+ .withInitialRetryWait(0)
65
+ .withMaxRetryWait(0);
66
+ }
67
+
68
+ @Test
69
+ @SuppressWarnings("unchecked")
70
+ public void guarantee_retry_attempts_just_like_Retryable() throws Exception
71
+ {
72
+ retryExecutor()
73
+ .withRetryLimit(0)
74
+ .run(new AlwaysRetryable(Deny.until(0)));
75
+ retryExecutor()
76
+ .withRetryLimit(1)
77
+ .run(new AlwaysRetryable(Deny.until(1)));
78
+ retryExecutor()
79
+ .withRetryLimit(2)
80
+ .run(new AlwaysRetryable(Deny.until(1)));
81
+ retryExecutor()
82
+ .withRetryLimit(3)
83
+ .run(new AlwaysRetryable(Deny.until(2)));
84
+ }
85
+
86
+ @Test(expected = RetryGiveupException.class)
87
+ @SuppressWarnings("unchecked")
88
+ public void fail_after_exceeding_attempts_just_like_Retryable() throws Exception
89
+ {
90
+ retryExecutor()
91
+ .withRetryLimit(3)
92
+ .run(new AlwaysRetryable(Deny.until(4)));
93
+ }
94
+
95
+ @Test(expected = Deny.class)
96
+ @SuppressWarnings("unchecked")
97
+ public void execute_should_unwrap_RetryGiveupException() throws Exception
98
+ {
99
+ new AlwaysRetryable(Deny.until(4))
100
+ .executeWith(retryExecutor().withRetryLimit(3));
101
+ }
102
+
103
+ @Test(expected = RuntimeException.class)
104
+ @SuppressWarnings("unchecked")
105
+ public void execute_should_unwrap_RetryGiveupException_but_rewrap_checked_exception_in_a_RuntimeException()
106
+ {
107
+ new AlwaysRetryable(Deny.until(4).with(new Exception("A checked exception")))
108
+ .executeWith(retryExecutor().withRetryLimit(3));
109
+ }
110
+
111
+ @Test(expected = IOException.class)
112
+ public void executeAndPropagateAsIs_should_leave_original_exception_unwrapped() throws IOException
113
+ {
114
+ RetryExecutor retryExc = retryExecutor().withRetryLimit(3);
115
+ // An explicit type parameter for operation return type is needed here,
116
+ // Without one, javac (at least on 1.8) will fails to infer the X exception type parameter.
117
+ new AlwaysRetryable<Object>() {
118
+ @Override
119
+ public Object call() throws IOException
120
+ {
121
+ throw new IOException();
122
+ }
123
+ }.executeWithCheckedException(retryExc, IOException.class);
124
+ }
125
+
126
+ @Test(expected = IllegalStateException.class)
127
+ public void execute_without_an_implementation_should_throw_an_IllegalStateException()
128
+ {
129
+ new AlwaysRetryable().executeWith(retryExecutor());
130
+ }
131
+ }
@@ -161,6 +161,28 @@ public class TestS3FileInputPlugin
161
161
  }
162
162
  }
163
163
 
164
+ @Test
165
+ public void usePath()
166
+ {
167
+ ConfigSource config = this.config.deepCopy()
168
+ .set("path", String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX))
169
+ .set("path_prefix", null);
170
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
171
+ assertEquals(String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX), configDiff.get(String.class, "last_path"));
172
+ assertRecords(config, output);
173
+ }
174
+
175
+ @Test
176
+ public void usePathAsHighPriorityThanPathPrefix()
177
+ {
178
+ ConfigSource config = this.config.deepCopy()
179
+ .set("path", String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX))
180
+ .set("path_prefix", "foo"); // path_prefix has the bad value, if path_prefix is chosen, expected result will be failed
181
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
182
+ assertEquals(String.format("%s/sample_01.csv", EMBULK_S3_TEST_PATH_PREFIX), configDiff.get(String.class, "last_path"));
183
+ assertRecords(config, output);
184
+ }
185
+
164
186
  @Test
165
187
  public void configuredEndpoint()
166
188
  {
@@ -1,5 +1,6 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
+ import com.amazonaws.AmazonClientException;
3
4
  import com.amazonaws.services.s3.AmazonS3;
4
5
  import com.amazonaws.services.s3.model.GetObjectRequest;
5
6
  import com.amazonaws.services.s3.model.ObjectMetadata;
@@ -16,7 +17,9 @@ import java.io.ByteArrayInputStream;
16
17
  import java.io.InputStream;
17
18
  import java.io.InputStreamReader;
18
19
 
20
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
19
21
  import static org.junit.Assert.assertEquals;
22
+ import static org.junit.Assert.fail;
20
23
  import static org.mockito.Matchers.any;
21
24
  import static org.mockito.Mockito.doReturn;
22
25
  import static org.mockito.Mockito.doThrow;
@@ -55,7 +58,13 @@ public class TestS3InputStreamReopener
55
58
  { // retry once
56
59
  doThrow(new RuntimeException()).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
57
60
 
58
- S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
61
+ S3InputStreamReopener opener = new S3InputStreamReopener(
62
+ client,
63
+ new GetObjectRequest("my_bucket", "in/aa/a"),
64
+ content.length(),
65
+ retryExecutor()
66
+ .withInitialRetryWait(0)
67
+ .withRetryLimit(1));
59
68
 
60
69
  try (InputStream in = opener.reopen(0, new RuntimeException())) {
61
70
  BufferedReader r = new BufferedReader(new InputStreamReader(in));
@@ -64,6 +73,42 @@ public class TestS3InputStreamReopener
64
73
  }
65
74
  }
66
75
 
76
+ @Test(expected = AmazonClientException.class)
77
+ public void reopenS3FileByReopener_on_retry_gave_up_should_throw_original_exception() throws Exception
78
+ {
79
+ String content = "value";
80
+ doThrow(new AmazonClientException("no")).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
81
+
82
+ S3InputStreamReopener opener = new S3InputStreamReopener(
83
+ client,
84
+ new GetObjectRequest("my_bucket", "in/aa/a"),
85
+ content.length(),
86
+ retryExecutor()
87
+ .withInitialRetryWait(0)
88
+ .withRetryLimit(0));
89
+
90
+ opener.reopen(0, new RuntimeException());
91
+ }
92
+
93
+ @Test(expected = AmazonClientException.class)
94
+ public void reopenS3FileByReopener_on_retry_always_throw_exception()
95
+ throws Exception
96
+ {
97
+ // always failed call with 2 retries
98
+ doThrow(new AmazonClientException("This exception is thrown when retrying.")).when(client).getObject(any(GetObjectRequest.class));
99
+ S3InputStreamReopener opener = new S3InputStreamReopener(
100
+ client,
101
+ new GetObjectRequest("my_bucket", "in/aa/a"),
102
+ "value".length(),
103
+ retryExecutor()
104
+ .withInitialRetryWait(0)
105
+ .withRetryLimit(2));
106
+
107
+ try (InputStream in = opener.reopen(0, new AmazonClientException("This exception can be ignored"))) {
108
+ fail("Should throw exception.");
109
+ }
110
+ }
111
+
67
112
  static S3Object s3object(String key, String value)
68
113
  {
69
114
  S3Object o = new S3Object();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.16
4
+ version: 0.2.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-11 00:00:00.000000000 Z
11
+ date: 2018-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -48,10 +48,13 @@ files:
48
48
  - build.gradle
49
49
  - lib/embulk/input/s3.rb
50
50
  - src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
51
+ - src/main/java/org/embulk/input/s3/AlwaysRetryable.java
51
52
  - src/main/java/org/embulk/input/s3/FileList.java
52
53
  - src/main/java/org/embulk/input/s3/HttpProxy.java
54
+ - src/main/java/org/embulk/input/s3/RetrySupportPluginTask.java
53
55
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
54
- - src/main/java/org/embulk/input/s3/S3FileInputUtils.java
56
+ - src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java
57
+ - src/test/java/org/embulk/input/s3/TestAlwaysRetryable.java
55
58
  - src/test/java/org/embulk/input/s3/TestAwsCredentials.java
56
59
  - src/test/java/org/embulk/input/s3/TestFileList.java
57
60
  - src/test/java/org/embulk/input/s3/TestHttpProxy.java
@@ -62,8 +65,8 @@ files:
62
65
  - classpath/aws-java-sdk-kms-1.11.253.jar
63
66
  - classpath/aws-java-sdk-s3-1.11.253.jar
64
67
  - classpath/commons-codec-1.9.jar
65
- - classpath/embulk-input-s3-0.2.16.jar
66
- - classpath/embulk-util-aws-credentials-0.2.16.jar
68
+ - classpath/embulk-input-s3-0.2.19.jar
69
+ - classpath/embulk-util-aws-credentials-0.2.19.jar
67
70
  - classpath/httpclient-4.5.2.jar
68
71
  - classpath/httpcore-4.4.4.jar
69
72
  - classpath/ion-java-1.0.2.jar
Binary file
@@ -1,51 +0,0 @@
1
- package org.embulk.input.s3;
2
-
3
- import org.embulk.spi.Exec;
4
- import org.embulk.spi.util.RetryExecutor;
5
- import org.slf4j.Logger;
6
-
7
- /**
8
- * Utility class for S3 File Input.
9
- */
10
- public final class S3FileInputUtils
11
- {
12
- private S3FileInputUtils()
13
- {
14
- }
15
-
16
- public static final <T> T executeWithRetry(int maximumRetries, int initialRetryIntervalMillis, int maximumRetryIntervalMillis, AlwaysRetryRetryable<T> alwaysRetryRetryable)
17
- throws RetryExecutor.RetryGiveupException, InterruptedException
18
- {
19
- return RetryExecutor.retryExecutor()
20
- .withRetryLimit(maximumRetries)
21
- .withInitialRetryWait(initialRetryIntervalMillis)
22
- .withMaxRetryWait(maximumRetryIntervalMillis)
23
- .runInterruptible(alwaysRetryRetryable);
24
- }
25
-
26
- public abstract static class AlwaysRetryRetryable<T> implements RetryExecutor.Retryable<T>
27
- {
28
- private static final Logger LOGGER = Exec.getLogger(AlwaysRetryRetryable.class);
29
-
30
- @Override
31
- public abstract T call() throws Exception;
32
-
33
- @Override
34
- public boolean isRetryableException(Exception exception)
35
- {
36
- return true;
37
- }
38
-
39
- @Override
40
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryExecutor.RetryGiveupException
41
- {
42
- LOGGER.info("Retry [{}]/[{}] with retryWait [{}] on exception {}", retryCount, retryLimit, retryWait, exception.getMessage());
43
- }
44
-
45
- @Override
46
- public void onGiveup(Exception firstException, Exception lastException) throws RetryExecutor.RetryGiveupException
47
- {
48
- LOGGER.error("Giving up retry, first exception is [{}], last exception is [{}]", firstException.getMessage(), lastException.getMessage());
49
- }
50
- }
51
- }