embulk-input-s3 0.2.15 → 0.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.2.16.jar +0 -0
- data/classpath/{embulk-util-aws-credentials-0.2.15.jar → embulk-util-aws-credentials-0.2.16.jar} +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +78 -45
- data/src/main/java/org/embulk/input/s3/S3FileInputUtils.java +51 -0
- metadata +5 -4
- data/classpath/embulk-input-s3-0.2.15.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95a2e3a70de76cdd69c0b0252f37b32671ce3896
|
4
|
+
data.tar.gz: 0f7a8a710a28478d528df8a31d08b996a0595772
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ae53adc0606da4ec1e2a72a742b18b27ea40145b27baf5375e905670f243b411d43e15a55fdeb2172cc158e1e83b257f290a79d2ff92aa890f9ab57e38d2174
|
7
|
+
data.tar.gz: 332d2963cfde7edf02ed80b9fda7d651a12c490e03f7f2984cee329858b978ebd5c2d5401e9ea30efc5e4a55e66ccd6d87d285a908bd3a4819dffa50de52591e
|
Binary file
|
data/classpath/{embulk-util-aws-credentials-0.2.15.jar → embulk-util-aws-credentials-0.2.16.jar}
RENAMED
Binary file
|
@@ -69,6 +69,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
69
69
|
@Config("http_proxy")
|
70
70
|
@ConfigDefault("null")
|
71
71
|
public Optional<HttpProxy> getHttpProxy();
|
72
|
+
|
72
73
|
public void setHttpProxy(Optional<HttpProxy> httpProxy);
|
73
74
|
|
74
75
|
@Config("incremental")
|
@@ -82,6 +83,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
82
83
|
// TODO timeout, ssl, etc
|
83
84
|
|
84
85
|
public FileList getFiles();
|
86
|
+
|
85
87
|
public void setFiles(FileList files);
|
86
88
|
|
87
89
|
@ConfigInject
|
@@ -104,8 +106,8 @@ public abstract class AbstractS3FileInputPlugin
|
|
104
106
|
|
105
107
|
@Override
|
106
108
|
public ConfigDiff resume(TaskSource taskSource,
|
107
|
-
|
108
|
-
|
109
|
+
int taskCount,
|
110
|
+
FileInputPlugin.Control control)
|
109
111
|
{
|
110
112
|
PluginTask task = taskSource.loadTask(getTaskClass());
|
111
113
|
|
@@ -128,8 +130,8 @@ public abstract class AbstractS3FileInputPlugin
|
|
128
130
|
|
129
131
|
@Override
|
130
132
|
public void cleanup(TaskSource taskSource,
|
131
|
-
|
132
|
-
|
133
|
+
int taskCount,
|
134
|
+
List<TaskReport> successTaskReports)
|
133
135
|
{
|
134
136
|
// do nothing
|
135
137
|
}
|
@@ -139,13 +141,19 @@ public abstract class AbstractS3FileInputPlugin
|
|
139
141
|
* Since this returns an immutable object, it is not for any further customizations by mutating,
|
140
142
|
* e.g., {@link AmazonS3#setEndpoint} will throw a runtime {@link UnsupportedOperationException}
|
141
143
|
* Subclass's customization should be done through {@link AbstractS3FileInputPlugin#defaultS3ClientBuilder}.
|
144
|
+
* @param task Embulk plugin task
|
145
|
+
* @return AmazonS3
|
142
146
|
*/
|
143
147
|
protected AmazonS3 newS3Client(PluginTask task)
|
144
148
|
{
|
145
149
|
return defaultS3ClientBuilder(task).build();
|
146
150
|
}
|
147
151
|
|
148
|
-
/**
|
152
|
+
/**
|
153
|
+
* A base builder for the subclasses to then customize.builder
|
154
|
+
* @param task Embulk plugin
|
155
|
+
* @return AmazonS3 client b
|
156
|
+
**/
|
149
157
|
protected AmazonS3ClientBuilder defaultS3ClientBuilder(PluginTask task)
|
150
158
|
{
|
151
159
|
return AmazonS3ClientBuilder
|
@@ -228,21 +236,45 @@ public abstract class AbstractS3FileInputPlugin
|
|
228
236
|
}
|
229
237
|
throw ex;
|
230
238
|
}
|
239
|
+
catch (InterruptedException | RetryGiveupException ex) {
|
240
|
+
throw new RuntimeException(ex);
|
241
|
+
}
|
231
242
|
}
|
232
243
|
|
233
244
|
/**
|
234
245
|
* Lists S3 filenames filtered by prefix.
|
235
|
-
*
|
246
|
+
* <p>
|
236
247
|
* The resulting list does not include the file that's size == 0.
|
248
|
+
* @param builder custom Filelist builder
|
249
|
+
* @param client Amazon S3
|
250
|
+
* @param bucketName Amazon S3 bucket name
|
251
|
+
* @param prefix Amazon S3 bucket name prefix
|
252
|
+
* @param lastPath last path
|
253
|
+
* @param skipGlacierObjects skip gracier objects
|
254
|
+
* @throws RetryGiveupException error when retrying
|
255
|
+
* @throws InterruptedException error when retrying
|
237
256
|
*/
|
238
257
|
public static void listS3FilesByPrefix(FileList.Builder builder,
|
239
|
-
|
240
|
-
|
258
|
+
final AmazonS3 client, final String bucketName,
|
259
|
+
final String prefix, Optional<String> lastPath, boolean skipGlacierObjects) throws RetryGiveupException, InterruptedException
|
241
260
|
{
|
242
261
|
String lastKey = lastPath.orNull();
|
243
262
|
do {
|
244
|
-
|
245
|
-
ObjectListing
|
263
|
+
final String finalLastKey = lastKey;
|
264
|
+
Optional<ObjectListing> optOl = S3FileInputUtils.executeWithRetry(3, 500, 30 * 1000, new S3FileInputUtils.AlwaysRetryRetryable<Optional<ObjectListing>>()
|
265
|
+
{
|
266
|
+
@Override
|
267
|
+
public Optional<ObjectListing> call() throws AmazonServiceException
|
268
|
+
{
|
269
|
+
ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
|
270
|
+
ObjectListing ol = client.listObjects(req);
|
271
|
+
return Optional.of(ol);
|
272
|
+
}
|
273
|
+
});
|
274
|
+
if (!optOl.isPresent()) {
|
275
|
+
break;
|
276
|
+
}
|
277
|
+
ObjectListing ol = optOl.get();
|
246
278
|
for (S3ObjectSummary s : ol.getObjectSummaries()) {
|
247
279
|
if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
|
248
280
|
if (skipGlacierObjects) {
|
@@ -262,7 +294,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
262
294
|
}
|
263
295
|
}
|
264
296
|
lastKey = ol.getNextMarker();
|
265
|
-
} while(lastKey != null);
|
297
|
+
} while (lastKey != null);
|
266
298
|
}
|
267
299
|
|
268
300
|
@Override
|
@@ -294,45 +326,46 @@ public abstract class AbstractS3FileInputPlugin
|
|
294
326
|
{
|
295
327
|
try {
|
296
328
|
return retryExecutor()
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
@Override
|
302
|
-
public InputStream call() throws InterruptedIOException
|
329
|
+
.withRetryLimit(3)
|
330
|
+
.withInitialRetryWait(500)
|
331
|
+
.withMaxRetryWait(30 * 1000)
|
332
|
+
.runInterruptible(new Retryable<InputStream>()
|
303
333
|
{
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
{
|
312
|
-
return true; // TODO
|
313
|
-
}
|
334
|
+
@Override
|
335
|
+
public InputStream call() throws InterruptedIOException
|
336
|
+
{
|
337
|
+
log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
338
|
+
request.setRange(offset, contentLength - 1); // [first, last]
|
339
|
+
return client.getObject(request).getObjectContent();
|
340
|
+
}
|
314
341
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
320
|
-
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
321
|
-
if (retryCount % 3 == 0) {
|
322
|
-
log.warn(message, exception);
|
342
|
+
@Override
|
343
|
+
public boolean isRetryableException(Exception exception)
|
344
|
+
{
|
345
|
+
return true; // TODO
|
323
346
|
}
|
324
|
-
|
325
|
-
|
347
|
+
|
348
|
+
@Override
|
349
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
350
|
+
throws RetryGiveupException
|
351
|
+
{
|
352
|
+
String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
353
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
354
|
+
if (retryCount % 3 == 0) {
|
355
|
+
log.warn(message, exception);
|
356
|
+
}
|
357
|
+
else {
|
358
|
+
log.warn(message);
|
359
|
+
}
|
326
360
|
}
|
327
|
-
}
|
328
361
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
362
|
+
@Override
|
363
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
364
|
+
throws RetryGiveupException
|
365
|
+
{
|
366
|
+
log.error("Giving up retry, first exception is [{}], last exception is [{}]", firstException.getMessage(), lastException.getMessage());
|
367
|
+
}
|
368
|
+
});
|
336
369
|
}
|
337
370
|
catch (RetryGiveupException ex) {
|
338
371
|
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
@@ -0,0 +1,51 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import org.embulk.spi.Exec;
|
4
|
+
import org.embulk.spi.util.RetryExecutor;
|
5
|
+
import org.slf4j.Logger;
|
6
|
+
|
7
|
+
/**
|
8
|
+
* Utility class for S3 File Input.
|
9
|
+
*/
|
10
|
+
public final class S3FileInputUtils
|
11
|
+
{
|
12
|
+
private S3FileInputUtils()
|
13
|
+
{
|
14
|
+
}
|
15
|
+
|
16
|
+
public static final <T> T executeWithRetry(int maximumRetries, int initialRetryIntervalMillis, int maximumRetryIntervalMillis, AlwaysRetryRetryable<T> alwaysRetryRetryable)
|
17
|
+
throws RetryExecutor.RetryGiveupException, InterruptedException
|
18
|
+
{
|
19
|
+
return RetryExecutor.retryExecutor()
|
20
|
+
.withRetryLimit(maximumRetries)
|
21
|
+
.withInitialRetryWait(initialRetryIntervalMillis)
|
22
|
+
.withMaxRetryWait(maximumRetryIntervalMillis)
|
23
|
+
.runInterruptible(alwaysRetryRetryable);
|
24
|
+
}
|
25
|
+
|
26
|
+
public abstract static class AlwaysRetryRetryable<T> implements RetryExecutor.Retryable<T>
|
27
|
+
{
|
28
|
+
private static final Logger LOGGER = Exec.getLogger(AlwaysRetryRetryable.class);
|
29
|
+
|
30
|
+
@Override
|
31
|
+
public abstract T call() throws Exception;
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public boolean isRetryableException(Exception exception)
|
35
|
+
{
|
36
|
+
return true;
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryExecutor.RetryGiveupException
|
41
|
+
{
|
42
|
+
LOGGER.info("Retry [{}]/[{}] with retryWait [{}] on exception {}", retryCount, retryLimit, retryWait, exception.getMessage());
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void onGiveup(Exception firstException, Exception lastException) throws RetryExecutor.RetryGiveupException
|
47
|
+
{
|
48
|
+
LOGGER.error("Giving up retry, first exception is [{}], last exception is [{}]", firstException.getMessage(), lastException.getMessage());
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- src/main/java/org/embulk/input/s3/FileList.java
|
52
52
|
- src/main/java/org/embulk/input/s3/HttpProxy.java
|
53
53
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
54
|
+
- src/main/java/org/embulk/input/s3/S3FileInputUtils.java
|
54
55
|
- src/test/java/org/embulk/input/s3/TestAwsCredentials.java
|
55
56
|
- src/test/java/org/embulk/input/s3/TestFileList.java
|
56
57
|
- src/test/java/org/embulk/input/s3/TestHttpProxy.java
|
@@ -61,8 +62,8 @@ files:
|
|
61
62
|
- classpath/aws-java-sdk-kms-1.11.253.jar
|
62
63
|
- classpath/aws-java-sdk-s3-1.11.253.jar
|
63
64
|
- classpath/commons-codec-1.9.jar
|
64
|
-
- classpath/embulk-input-s3-0.2.
|
65
|
-
- classpath/embulk-util-aws-credentials-0.2.
|
65
|
+
- classpath/embulk-input-s3-0.2.16.jar
|
66
|
+
- classpath/embulk-util-aws-credentials-0.2.16.jar
|
66
67
|
- classpath/httpclient-4.5.2.jar
|
67
68
|
- classpath/httpcore-4.4.4.jar
|
68
69
|
- classpath/ion-java-1.0.2.jar
|
Binary file
|