embulk-input-s3 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea85a2598851ff4ca60a0bc98385fa908db356b1
|
4
|
+
data.tar.gz: 76f750ed20b19f1bc7a1d1dd56f362ad8e1bc15d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61c3385b21b589b29f8b69b2f26cd16522a0ce2057cf8bcb53f1a3cf7814612f5115326b6929d2886693692cae6841ae9e881d2d2220f48073fd498d4a9a948c
|
7
|
+
data.tar.gz: 88ca3e86cea694e4927d85615de0d68e03ebad6b05a95029726bd93851b88ebd1b1ea90a529d36658f59cbd628a381f8d7ba28001ab3a3c92e226447260a1618
|
data/ChangeLog
CHANGED
data/build.gradle
CHANGED
@@ -0,0 +1,130 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import java.util.concurrent.Callable;
|
4
|
+
import java.util.concurrent.ExecutionException;
|
5
|
+
|
6
|
+
public class RetryExecutor
|
7
|
+
{
|
8
|
+
public static RetryExecutor retryExecutor()
|
9
|
+
{
|
10
|
+
// TODO default configuration
|
11
|
+
return new RetryExecutor(3, 500, 30*60*1000);
|
12
|
+
}
|
13
|
+
|
14
|
+
public static class RetryGiveupException
|
15
|
+
extends ExecutionException
|
16
|
+
{
|
17
|
+
public RetryGiveupException(String message, Exception cause)
|
18
|
+
{
|
19
|
+
super(cause);
|
20
|
+
}
|
21
|
+
|
22
|
+
public RetryGiveupException(Exception cause)
|
23
|
+
{
|
24
|
+
super(cause);
|
25
|
+
}
|
26
|
+
|
27
|
+
public Exception getCause()
|
28
|
+
{
|
29
|
+
return (Exception) super.getCause();
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
public static interface Retryable<T>
|
34
|
+
extends Callable<T>
|
35
|
+
{
|
36
|
+
public T call()
|
37
|
+
throws Exception;
|
38
|
+
|
39
|
+
public boolean isRetryableException(Exception exception);
|
40
|
+
|
41
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
42
|
+
throws RetryGiveupException;
|
43
|
+
|
44
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
45
|
+
throws RetryGiveupException;
|
46
|
+
}
|
47
|
+
|
48
|
+
private final int retryLimit;
|
49
|
+
private final int initialRetryWait;
|
50
|
+
private final int maxRetryWait;
|
51
|
+
|
52
|
+
private RetryExecutor(int retryLimit, int initialRetryWait, int maxRetryWait)
|
53
|
+
{
|
54
|
+
this.retryLimit = retryLimit;
|
55
|
+
this.initialRetryWait = initialRetryWait;
|
56
|
+
this.maxRetryWait = maxRetryWait;
|
57
|
+
}
|
58
|
+
|
59
|
+
public RetryExecutor withRetryLimit(int count)
|
60
|
+
{
|
61
|
+
return new RetryExecutor(count, initialRetryWait, maxRetryWait);
|
62
|
+
}
|
63
|
+
|
64
|
+
public RetryExecutor withInitialRetryWait(int msec)
|
65
|
+
{
|
66
|
+
return new RetryExecutor(retryLimit, msec, maxRetryWait);
|
67
|
+
}
|
68
|
+
|
69
|
+
public RetryExecutor withMaxRetryWait(int msec)
|
70
|
+
{
|
71
|
+
return new RetryExecutor(retryLimit, initialRetryWait, msec);
|
72
|
+
}
|
73
|
+
|
74
|
+
public <T> T runInterruptible(Retryable<T> op)
|
75
|
+
throws InterruptedException, RetryGiveupException
|
76
|
+
{
|
77
|
+
return run(op, true);
|
78
|
+
}
|
79
|
+
|
80
|
+
public <T> T run(Retryable<T> op)
|
81
|
+
throws RetryGiveupException
|
82
|
+
{
|
83
|
+
try {
|
84
|
+
return run(op, false);
|
85
|
+
} catch (InterruptedException ex) {
|
86
|
+
throw new RetryGiveupException("Unexpected interruption", ex);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
private <T> T run(Retryable<T> op, boolean interruptible)
|
91
|
+
throws InterruptedException, RetryGiveupException
|
92
|
+
{
|
93
|
+
int retryWait = initialRetryWait;
|
94
|
+
int retryCount = 0;
|
95
|
+
|
96
|
+
Exception firstException = null;
|
97
|
+
|
98
|
+
while(true) {
|
99
|
+
try {
|
100
|
+
return op.call();
|
101
|
+
} catch (Exception exception) {
|
102
|
+
if (firstException == null) {
|
103
|
+
firstException = exception;
|
104
|
+
}
|
105
|
+
if (!op.isRetryableException(exception) || retryCount >= retryLimit) {
|
106
|
+
op.onGiveup(firstException, exception);
|
107
|
+
throw new RetryGiveupException(firstException);
|
108
|
+
}
|
109
|
+
|
110
|
+
retryCount++;
|
111
|
+
op.onRetry(exception, retryCount, retryLimit, retryWait);
|
112
|
+
|
113
|
+
try {
|
114
|
+
Thread.sleep(retryWait);
|
115
|
+
} catch (InterruptedException ex) {
|
116
|
+
if (interruptible) {
|
117
|
+
throw ex;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
// exponential back-off with hard limit
|
122
|
+
retryWait *= 2;
|
123
|
+
if (retryWait > maxRetryWait) {
|
124
|
+
retryWait = maxRetryWait;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
@@ -0,0 +1,128 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import java.io.InputStream;
|
4
|
+
import java.io.IOException;
|
5
|
+
|
6
|
+
public class RetryableInputStream
|
7
|
+
extends InputStream
|
8
|
+
{
|
9
|
+
public interface Opener
|
10
|
+
{
|
11
|
+
public InputStream open(long offset, Exception exception) throws IOException;
|
12
|
+
}
|
13
|
+
|
14
|
+
private final Opener opener;
|
15
|
+
protected InputStream in;
|
16
|
+
private long offset;
|
17
|
+
private long markedOffset;
|
18
|
+
|
19
|
+
public RetryableInputStream(InputStream initialInputStream, Opener reopener)
|
20
|
+
{
|
21
|
+
this.opener = reopener;
|
22
|
+
this.in = initialInputStream;
|
23
|
+
this.offset = 0L;
|
24
|
+
this.markedOffset = 0L;
|
25
|
+
}
|
26
|
+
|
27
|
+
public RetryableInputStream(Opener opener) throws IOException
|
28
|
+
{
|
29
|
+
this(opener.open(0, null), opener);
|
30
|
+
}
|
31
|
+
|
32
|
+
private void reopen(Exception exception) throws IOException
|
33
|
+
{
|
34
|
+
if (in != null) {
|
35
|
+
in.close();
|
36
|
+
in = null;
|
37
|
+
}
|
38
|
+
in = opener.open(offset, exception);
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public int read() throws IOException
|
43
|
+
{
|
44
|
+
while (true) {
|
45
|
+
try {
|
46
|
+
int v = in.read();
|
47
|
+
offset += 1;
|
48
|
+
return v;
|
49
|
+
} catch (IOException | RuntimeException ex) {
|
50
|
+
reopen(ex);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public int read(byte[] b) throws IOException
|
57
|
+
{
|
58
|
+
while (true) {
|
59
|
+
try {
|
60
|
+
int r = in.read(b);
|
61
|
+
offset += r;
|
62
|
+
return r;
|
63
|
+
} catch (IOException | RuntimeException ex) {
|
64
|
+
reopen(ex);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public int read(byte[] b, int off, int len) throws IOException
|
71
|
+
{
|
72
|
+
while (true) {
|
73
|
+
try {
|
74
|
+
int r = in.read(b, off, len);
|
75
|
+
offset += r;
|
76
|
+
return r;
|
77
|
+
} catch (IOException | RuntimeException ex) {
|
78
|
+
reopen(ex);
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
@Override
|
84
|
+
public long skip(long n) throws IOException
|
85
|
+
{
|
86
|
+
while (true) {
|
87
|
+
try {
|
88
|
+
long r = in.skip(n);
|
89
|
+
offset += r;
|
90
|
+
return r;
|
91
|
+
} catch (IOException | RuntimeException ex) {
|
92
|
+
reopen(ex);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
@Override
|
98
|
+
public int available() throws IOException
|
99
|
+
{
|
100
|
+
return in.available();
|
101
|
+
}
|
102
|
+
|
103
|
+
@Override
|
104
|
+
public void close() throws IOException
|
105
|
+
{
|
106
|
+
in.close();
|
107
|
+
}
|
108
|
+
|
109
|
+
@Override
|
110
|
+
public void mark(int readlimit)
|
111
|
+
{
|
112
|
+
in.mark(readlimit);
|
113
|
+
markedOffset = offset;
|
114
|
+
}
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void reset() throws IOException
|
118
|
+
{
|
119
|
+
in.reset();
|
120
|
+
offset = markedOffset;
|
121
|
+
}
|
122
|
+
|
123
|
+
@Override
|
124
|
+
public boolean markSupported()
|
125
|
+
{
|
126
|
+
return in.markSupported();
|
127
|
+
}
|
128
|
+
}
|
@@ -4,9 +4,11 @@ import java.util.List;
|
|
4
4
|
import java.util.ArrayList;
|
5
5
|
import java.util.Collections;
|
6
6
|
import java.io.IOException;
|
7
|
+
import java.io.InterruptedIOException;
|
7
8
|
import java.io.InputStream;
|
8
9
|
import com.google.common.collect.ImmutableList;
|
9
10
|
import com.google.common.base.Optional;
|
11
|
+
import com.google.common.base.Throwables;
|
10
12
|
import org.slf4j.Logger;
|
11
13
|
import com.amazonaws.auth.AWSCredentials;
|
12
14
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
@@ -32,6 +34,9 @@ import org.embulk.spi.Exec;
|
|
32
34
|
import org.embulk.spi.FileInputPlugin;
|
33
35
|
import org.embulk.spi.TransactionalFileInput;
|
34
36
|
import org.embulk.spi.util.InputStreamFileInput;
|
37
|
+
import org.embulk.input.s3.RetryExecutor.Retryable;
|
38
|
+
import org.embulk.input.s3.RetryExecutor.RetryGiveupException;
|
39
|
+
import static org.embulk.input.s3.RetryExecutor.retryExecutor;
|
35
40
|
|
36
41
|
public class S3FileInputPlugin
|
37
42
|
implements FileInputPlugin
|
@@ -189,6 +194,73 @@ public class S3FileInputPlugin
|
|
189
194
|
return new S3FileInput(task, taskIndex);
|
190
195
|
}
|
191
196
|
|
197
|
+
private static class S3RetryableOpener
|
198
|
+
implements RetryableInputStream.Opener
|
199
|
+
{
|
200
|
+
private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
|
201
|
+
|
202
|
+
private final AmazonS3Client client;
|
203
|
+
private final GetObjectRequest request;
|
204
|
+
private final long contentLength;
|
205
|
+
|
206
|
+
public S3RetryableOpener(AmazonS3Client client, GetObjectRequest request, long contentLength)
|
207
|
+
{
|
208
|
+
this.client = client;
|
209
|
+
this.request = request;
|
210
|
+
this.contentLength = contentLength;
|
211
|
+
}
|
212
|
+
|
213
|
+
@Override
|
214
|
+
public InputStream open(final long offset, final Exception exception) throws IOException
|
215
|
+
{
|
216
|
+
try {
|
217
|
+
return retryExecutor()
|
218
|
+
.withRetryLimit(3)
|
219
|
+
.withInitialRetryWait(500)
|
220
|
+
.withMaxRetryWait(30*1000)
|
221
|
+
.runInterruptible(new Retryable<InputStream>() {
|
222
|
+
@Override
|
223
|
+
public InputStream call() throws InterruptedIOException
|
224
|
+
{
|
225
|
+
log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), exception);
|
226
|
+
request.setRange(offset, contentLength - 1); // [first, last]
|
227
|
+
return client.getObject(request).getObjectContent();
|
228
|
+
}
|
229
|
+
|
230
|
+
@Override
|
231
|
+
public boolean isRetryableException(Exception exception)
|
232
|
+
{
|
233
|
+
return true; // TODO
|
234
|
+
}
|
235
|
+
|
236
|
+
@Override
|
237
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
238
|
+
throws RetryGiveupException
|
239
|
+
{
|
240
|
+
String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
241
|
+
retryCount, retryLimit, retryWait/1000, exception.getMessage());
|
242
|
+
if (retryCount % 3 == 0) {
|
243
|
+
log.warn(message, exception);
|
244
|
+
} else {
|
245
|
+
log.warn(message);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
|
249
|
+
@Override
|
250
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
251
|
+
throws RetryGiveupException
|
252
|
+
{
|
253
|
+
}
|
254
|
+
});
|
255
|
+
} catch (RetryGiveupException ex) {
|
256
|
+
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
257
|
+
throw Throwables.propagate(ex.getCause());
|
258
|
+
} catch (InterruptedException ex) {
|
259
|
+
throw new InterruptedIOException();
|
260
|
+
}
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
192
264
|
public static class S3FileInput
|
193
265
|
extends InputStreamFileInput
|
194
266
|
implements TransactionalFileInput
|
@@ -217,15 +289,8 @@ public class S3FileInputPlugin
|
|
217
289
|
}
|
218
290
|
opened = true;
|
219
291
|
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
220
|
-
//if (pos > 0) {
|
221
|
-
// request.setRange(pos, contentLength);
|
222
|
-
//}
|
223
292
|
S3Object obj = client.getObject(request);
|
224
|
-
|
225
|
-
// // first call
|
226
|
-
// contentLength = obj.getObjectMetadata().getContentLength();
|
227
|
-
//}
|
228
|
-
return obj.getObjectContent();
|
293
|
+
return new RetryableInputStream(obj.getObjectContent(), new S3RetryableOpener(client, request, obj.getObjectMetadata().getContentLength()));
|
229
294
|
}
|
230
295
|
|
231
296
|
@Override
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -54,6 +54,8 @@ files:
|
|
54
54
|
- gradlew
|
55
55
|
- gradlew.bat
|
56
56
|
- lib/embulk/input/s3.rb
|
57
|
+
- src/main/java/org/embulk/input/s3/RetryExecutor.java
|
58
|
+
- src/main/java/org/embulk/input/s3/RetryableInputStream.java
|
57
59
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
58
60
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
59
61
|
- classpath/aws-java-sdk-core-1.9.22.jar
|
@@ -61,7 +63,7 @@ files:
|
|
61
63
|
- classpath/aws-java-sdk-s3-1.9.22.jar
|
62
64
|
- classpath/commons-codec-1.6.jar
|
63
65
|
- classpath/commons-logging-1.1.3.jar
|
64
|
-
- classpath/embulk-input-s3-0.1.
|
66
|
+
- classpath/embulk-input-s3-0.1.5.jar
|
65
67
|
- classpath/httpclient-4.3.4.jar
|
66
68
|
- classpath/httpcore-4.3.2.jar
|
67
69
|
homepage: https://github.com/embulk/embulk-input-s3
|