embulk-input-s3 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea85a2598851ff4ca60a0bc98385fa908db356b1
|
4
|
+
data.tar.gz: 76f750ed20b19f1bc7a1d1dd56f362ad8e1bc15d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61c3385b21b589b29f8b69b2f26cd16522a0ce2057cf8bcb53f1a3cf7814612f5115326b6929d2886693692cae6841ae9e881d2d2220f48073fd498d4a9a948c
|
7
|
+
data.tar.gz: 88ca3e86cea694e4927d85615de0d68e03ebad6b05a95029726bd93851b88ebd1b1ea90a529d36658f59cbd628a381f8d7ba28001ab3a3c92e226447260a1618
|
data/ChangeLog
CHANGED
data/build.gradle
CHANGED
@@ -0,0 +1,130 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import java.util.concurrent.Callable;
|
4
|
+
import java.util.concurrent.ExecutionException;
|
5
|
+
|
6
|
+
public class RetryExecutor
|
7
|
+
{
|
8
|
+
public static RetryExecutor retryExecutor()
|
9
|
+
{
|
10
|
+
// TODO default configuration
|
11
|
+
return new RetryExecutor(3, 500, 30*60*1000);
|
12
|
+
}
|
13
|
+
|
14
|
+
public static class RetryGiveupException
|
15
|
+
extends ExecutionException
|
16
|
+
{
|
17
|
+
public RetryGiveupException(String message, Exception cause)
|
18
|
+
{
|
19
|
+
super(cause);
|
20
|
+
}
|
21
|
+
|
22
|
+
public RetryGiveupException(Exception cause)
|
23
|
+
{
|
24
|
+
super(cause);
|
25
|
+
}
|
26
|
+
|
27
|
+
public Exception getCause()
|
28
|
+
{
|
29
|
+
return (Exception) super.getCause();
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
public static interface Retryable<T>
|
34
|
+
extends Callable<T>
|
35
|
+
{
|
36
|
+
public T call()
|
37
|
+
throws Exception;
|
38
|
+
|
39
|
+
public boolean isRetryableException(Exception exception);
|
40
|
+
|
41
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
42
|
+
throws RetryGiveupException;
|
43
|
+
|
44
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
45
|
+
throws RetryGiveupException;
|
46
|
+
}
|
47
|
+
|
48
|
+
private final int retryLimit;
|
49
|
+
private final int initialRetryWait;
|
50
|
+
private final int maxRetryWait;
|
51
|
+
|
52
|
+
private RetryExecutor(int retryLimit, int initialRetryWait, int maxRetryWait)
|
53
|
+
{
|
54
|
+
this.retryLimit = retryLimit;
|
55
|
+
this.initialRetryWait = initialRetryWait;
|
56
|
+
this.maxRetryWait = maxRetryWait;
|
57
|
+
}
|
58
|
+
|
59
|
+
public RetryExecutor withRetryLimit(int count)
|
60
|
+
{
|
61
|
+
return new RetryExecutor(count, initialRetryWait, maxRetryWait);
|
62
|
+
}
|
63
|
+
|
64
|
+
public RetryExecutor withInitialRetryWait(int msec)
|
65
|
+
{
|
66
|
+
return new RetryExecutor(retryLimit, msec, maxRetryWait);
|
67
|
+
}
|
68
|
+
|
69
|
+
public RetryExecutor withMaxRetryWait(int msec)
|
70
|
+
{
|
71
|
+
return new RetryExecutor(retryLimit, initialRetryWait, msec);
|
72
|
+
}
|
73
|
+
|
74
|
+
public <T> T runInterruptible(Retryable<T> op)
|
75
|
+
throws InterruptedException, RetryGiveupException
|
76
|
+
{
|
77
|
+
return run(op, true);
|
78
|
+
}
|
79
|
+
|
80
|
+
public <T> T run(Retryable<T> op)
|
81
|
+
throws RetryGiveupException
|
82
|
+
{
|
83
|
+
try {
|
84
|
+
return run(op, false);
|
85
|
+
} catch (InterruptedException ex) {
|
86
|
+
throw new RetryGiveupException("Unexpected interruption", ex);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
private <T> T run(Retryable<T> op, boolean interruptible)
|
91
|
+
throws InterruptedException, RetryGiveupException
|
92
|
+
{
|
93
|
+
int retryWait = initialRetryWait;
|
94
|
+
int retryCount = 0;
|
95
|
+
|
96
|
+
Exception firstException = null;
|
97
|
+
|
98
|
+
while(true) {
|
99
|
+
try {
|
100
|
+
return op.call();
|
101
|
+
} catch (Exception exception) {
|
102
|
+
if (firstException == null) {
|
103
|
+
firstException = exception;
|
104
|
+
}
|
105
|
+
if (!op.isRetryableException(exception) || retryCount >= retryLimit) {
|
106
|
+
op.onGiveup(firstException, exception);
|
107
|
+
throw new RetryGiveupException(firstException);
|
108
|
+
}
|
109
|
+
|
110
|
+
retryCount++;
|
111
|
+
op.onRetry(exception, retryCount, retryLimit, retryWait);
|
112
|
+
|
113
|
+
try {
|
114
|
+
Thread.sleep(retryWait);
|
115
|
+
} catch (InterruptedException ex) {
|
116
|
+
if (interruptible) {
|
117
|
+
throw ex;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
// exponential back-off with hard limit
|
122
|
+
retryWait *= 2;
|
123
|
+
if (retryWait > maxRetryWait) {
|
124
|
+
retryWait = maxRetryWait;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
@@ -0,0 +1,128 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import java.io.InputStream;
|
4
|
+
import java.io.IOException;
|
5
|
+
|
6
|
+
public class RetryableInputStream
|
7
|
+
extends InputStream
|
8
|
+
{
|
9
|
+
public interface Opener
|
10
|
+
{
|
11
|
+
public InputStream open(long offset, Exception exception) throws IOException;
|
12
|
+
}
|
13
|
+
|
14
|
+
private final Opener opener;
|
15
|
+
protected InputStream in;
|
16
|
+
private long offset;
|
17
|
+
private long markedOffset;
|
18
|
+
|
19
|
+
public RetryableInputStream(InputStream initialInputStream, Opener reopener)
|
20
|
+
{
|
21
|
+
this.opener = reopener;
|
22
|
+
this.in = initialInputStream;
|
23
|
+
this.offset = 0L;
|
24
|
+
this.markedOffset = 0L;
|
25
|
+
}
|
26
|
+
|
27
|
+
public RetryableInputStream(Opener opener) throws IOException
|
28
|
+
{
|
29
|
+
this(opener.open(0, null), opener);
|
30
|
+
}
|
31
|
+
|
32
|
+
private void reopen(Exception exception) throws IOException
|
33
|
+
{
|
34
|
+
if (in != null) {
|
35
|
+
in.close();
|
36
|
+
in = null;
|
37
|
+
}
|
38
|
+
in = opener.open(offset, exception);
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public int read() throws IOException
|
43
|
+
{
|
44
|
+
while (true) {
|
45
|
+
try {
|
46
|
+
int v = in.read();
|
47
|
+
offset += 1;
|
48
|
+
return v;
|
49
|
+
} catch (IOException | RuntimeException ex) {
|
50
|
+
reopen(ex);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public int read(byte[] b) throws IOException
|
57
|
+
{
|
58
|
+
while (true) {
|
59
|
+
try {
|
60
|
+
int r = in.read(b);
|
61
|
+
offset += r;
|
62
|
+
return r;
|
63
|
+
} catch (IOException | RuntimeException ex) {
|
64
|
+
reopen(ex);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public int read(byte[] b, int off, int len) throws IOException
|
71
|
+
{
|
72
|
+
while (true) {
|
73
|
+
try {
|
74
|
+
int r = in.read(b, off, len);
|
75
|
+
offset += r;
|
76
|
+
return r;
|
77
|
+
} catch (IOException | RuntimeException ex) {
|
78
|
+
reopen(ex);
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
@Override
|
84
|
+
public long skip(long n) throws IOException
|
85
|
+
{
|
86
|
+
while (true) {
|
87
|
+
try {
|
88
|
+
long r = in.skip(n);
|
89
|
+
offset += r;
|
90
|
+
return r;
|
91
|
+
} catch (IOException | RuntimeException ex) {
|
92
|
+
reopen(ex);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
@Override
|
98
|
+
public int available() throws IOException
|
99
|
+
{
|
100
|
+
return in.available();
|
101
|
+
}
|
102
|
+
|
103
|
+
@Override
|
104
|
+
public void close() throws IOException
|
105
|
+
{
|
106
|
+
in.close();
|
107
|
+
}
|
108
|
+
|
109
|
+
@Override
|
110
|
+
public void mark(int readlimit)
|
111
|
+
{
|
112
|
+
in.mark(readlimit);
|
113
|
+
markedOffset = offset;
|
114
|
+
}
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void reset() throws IOException
|
118
|
+
{
|
119
|
+
in.reset();
|
120
|
+
offset = markedOffset;
|
121
|
+
}
|
122
|
+
|
123
|
+
@Override
|
124
|
+
public boolean markSupported()
|
125
|
+
{
|
126
|
+
return in.markSupported();
|
127
|
+
}
|
128
|
+
}
|
@@ -4,9 +4,11 @@ import java.util.List;
|
|
4
4
|
import java.util.ArrayList;
|
5
5
|
import java.util.Collections;
|
6
6
|
import java.io.IOException;
|
7
|
+
import java.io.InterruptedIOException;
|
7
8
|
import java.io.InputStream;
|
8
9
|
import com.google.common.collect.ImmutableList;
|
9
10
|
import com.google.common.base.Optional;
|
11
|
+
import com.google.common.base.Throwables;
|
10
12
|
import org.slf4j.Logger;
|
11
13
|
import com.amazonaws.auth.AWSCredentials;
|
12
14
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
@@ -32,6 +34,9 @@ import org.embulk.spi.Exec;
|
|
32
34
|
import org.embulk.spi.FileInputPlugin;
|
33
35
|
import org.embulk.spi.TransactionalFileInput;
|
34
36
|
import org.embulk.spi.util.InputStreamFileInput;
|
37
|
+
import org.embulk.input.s3.RetryExecutor.Retryable;
|
38
|
+
import org.embulk.input.s3.RetryExecutor.RetryGiveupException;
|
39
|
+
import static org.embulk.input.s3.RetryExecutor.retryExecutor;
|
35
40
|
|
36
41
|
public class S3FileInputPlugin
|
37
42
|
implements FileInputPlugin
|
@@ -189,6 +194,73 @@ public class S3FileInputPlugin
|
|
189
194
|
return new S3FileInput(task, taskIndex);
|
190
195
|
}
|
191
196
|
|
197
|
+
private static class S3RetryableOpener
|
198
|
+
implements RetryableInputStream.Opener
|
199
|
+
{
|
200
|
+
private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
|
201
|
+
|
202
|
+
private final AmazonS3Client client;
|
203
|
+
private final GetObjectRequest request;
|
204
|
+
private final long contentLength;
|
205
|
+
|
206
|
+
public S3RetryableOpener(AmazonS3Client client, GetObjectRequest request, long contentLength)
|
207
|
+
{
|
208
|
+
this.client = client;
|
209
|
+
this.request = request;
|
210
|
+
this.contentLength = contentLength;
|
211
|
+
}
|
212
|
+
|
213
|
+
@Override
|
214
|
+
public InputStream open(final long offset, final Exception exception) throws IOException
|
215
|
+
{
|
216
|
+
try {
|
217
|
+
return retryExecutor()
|
218
|
+
.withRetryLimit(3)
|
219
|
+
.withInitialRetryWait(500)
|
220
|
+
.withMaxRetryWait(30*1000)
|
221
|
+
.runInterruptible(new Retryable<InputStream>() {
|
222
|
+
@Override
|
223
|
+
public InputStream call() throws InterruptedIOException
|
224
|
+
{
|
225
|
+
log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), exception);
|
226
|
+
request.setRange(offset, contentLength - 1); // [first, last]
|
227
|
+
return client.getObject(request).getObjectContent();
|
228
|
+
}
|
229
|
+
|
230
|
+
@Override
|
231
|
+
public boolean isRetryableException(Exception exception)
|
232
|
+
{
|
233
|
+
return true; // TODO
|
234
|
+
}
|
235
|
+
|
236
|
+
@Override
|
237
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
238
|
+
throws RetryGiveupException
|
239
|
+
{
|
240
|
+
String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
241
|
+
retryCount, retryLimit, retryWait/1000, exception.getMessage());
|
242
|
+
if (retryCount % 3 == 0) {
|
243
|
+
log.warn(message, exception);
|
244
|
+
} else {
|
245
|
+
log.warn(message);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
|
249
|
+
@Override
|
250
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
251
|
+
throws RetryGiveupException
|
252
|
+
{
|
253
|
+
}
|
254
|
+
});
|
255
|
+
} catch (RetryGiveupException ex) {
|
256
|
+
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
257
|
+
throw Throwables.propagate(ex.getCause());
|
258
|
+
} catch (InterruptedException ex) {
|
259
|
+
throw new InterruptedIOException();
|
260
|
+
}
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
192
264
|
public static class S3FileInput
|
193
265
|
extends InputStreamFileInput
|
194
266
|
implements TransactionalFileInput
|
@@ -217,15 +289,8 @@ public class S3FileInputPlugin
|
|
217
289
|
}
|
218
290
|
opened = true;
|
219
291
|
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
220
|
-
//if (pos > 0) {
|
221
|
-
// request.setRange(pos, contentLength);
|
222
|
-
//}
|
223
292
|
S3Object obj = client.getObject(request);
|
224
|
-
|
225
|
-
// // first call
|
226
|
-
// contentLength = obj.getObjectMetadata().getContentLength();
|
227
|
-
//}
|
228
|
-
return obj.getObjectContent();
|
293
|
+
return new RetryableInputStream(obj.getObjectContent(), new S3RetryableOpener(client, request, obj.getObjectMetadata().getContentLength()));
|
229
294
|
}
|
230
295
|
|
231
296
|
@Override
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
@@ -54,6 +54,8 @@ files:
|
|
54
54
|
- gradlew
|
55
55
|
- gradlew.bat
|
56
56
|
- lib/embulk/input/s3.rb
|
57
|
+
- src/main/java/org/embulk/input/s3/RetryExecutor.java
|
58
|
+
- src/main/java/org/embulk/input/s3/RetryableInputStream.java
|
57
59
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
58
60
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
59
61
|
- classpath/aws-java-sdk-core-1.9.22.jar
|
@@ -61,7 +63,7 @@ files:
|
|
61
63
|
- classpath/aws-java-sdk-s3-1.9.22.jar
|
62
64
|
- classpath/commons-codec-1.6.jar
|
63
65
|
- classpath/commons-logging-1.1.3.jar
|
64
|
-
- classpath/embulk-input-s3-0.1.
|
66
|
+
- classpath/embulk-input-s3-0.1.5.jar
|
65
67
|
- classpath/httpclient-4.3.4.jar
|
66
68
|
- classpath/httpcore-4.3.2.jar
|
67
69
|
homepage: https://github.com/embulk/embulk-input-s3
|