embulk-input-s3 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6b8d337887e940e5fbb90c02ca32fe278ad21ea1
4
- data.tar.gz: a438072bd951e5b2bee43e722da2007859e5bc27
3
+ metadata.gz: ea85a2598851ff4ca60a0bc98385fa908db356b1
4
+ data.tar.gz: 76f750ed20b19f1bc7a1d1dd56f362ad8e1bc15d
5
5
  SHA512:
6
- metadata.gz: 84e08341b88f200133fa2e0a562409d1f1a6b122c0408eae14761174445641f995273f94b81340b82373d91b78c3b854986926a8ac7c7cfe1d76fd166870f169
7
- data.tar.gz: d2372bb97d642951ff2c0c67088ea5c8ca1c859a11fee050b2b7848f3f616bd8b0dd47c4f15743bb4185d52dd4b04d77bd1983ee0e671b26b010b280e82c5a4a
6
+ metadata.gz: 61c3385b21b589b29f8b69b2f26cd16522a0ce2057cf8bcb53f1a3cf7814612f5115326b6929d2886693692cae6841ae9e881d2d2220f48073fd498d4a9a948c
7
+ data.tar.gz: 88ca3e86cea694e4927d85615de0d68e03ebad6b05a95029726bd93851b88ebd1b1ea90a529d36658f59cbd628a381f8d7ba28001ab3a3c92e226447260a1618
data/ChangeLog CHANGED
@@ -1,4 +1,4 @@
1
- Release 0.1.4 - 2015-03-23
1
+ Release 0.1.5 - 2015-03-23
2
2
 
3
3
  * Implemented retrying
4
4
 
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.4"
16
+ version = "0.1.5"
17
17
 
18
18
  dependencies {
19
19
  compile "org.embulk:embulk-core:0.5.3"
@@ -0,0 +1,130 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.util.concurrent.Callable;
4
+ import java.util.concurrent.ExecutionException;
5
+
6
+ public class RetryExecutor
7
+ {
8
+ public static RetryExecutor retryExecutor()
9
+ {
10
+ // TODO default configuration
11
+ return new RetryExecutor(3, 500, 30*60*1000);
12
+ }
13
+
14
+ public static class RetryGiveupException
15
+ extends ExecutionException
16
+ {
17
+ public RetryGiveupException(String message, Exception cause)
18
+ {
19
+ super(cause);
20
+ }
21
+
22
+ public RetryGiveupException(Exception cause)
23
+ {
24
+ super(cause);
25
+ }
26
+
27
+ public Exception getCause()
28
+ {
29
+ return (Exception) super.getCause();
30
+ }
31
+ }
32
+
33
+ public static interface Retryable<T>
34
+ extends Callable<T>
35
+ {
36
+ public T call()
37
+ throws Exception;
38
+
39
+ public boolean isRetryableException(Exception exception);
40
+
41
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
42
+ throws RetryGiveupException;
43
+
44
+ public void onGiveup(Exception firstException, Exception lastException)
45
+ throws RetryGiveupException;
46
+ }
47
+
48
+ private final int retryLimit;
49
+ private final int initialRetryWait;
50
+ private final int maxRetryWait;
51
+
52
+ private RetryExecutor(int retryLimit, int initialRetryWait, int maxRetryWait)
53
+ {
54
+ this.retryLimit = retryLimit;
55
+ this.initialRetryWait = initialRetryWait;
56
+ this.maxRetryWait = maxRetryWait;
57
+ }
58
+
59
+ public RetryExecutor withRetryLimit(int count)
60
+ {
61
+ return new RetryExecutor(count, initialRetryWait, maxRetryWait);
62
+ }
63
+
64
+ public RetryExecutor withInitialRetryWait(int msec)
65
+ {
66
+ return new RetryExecutor(retryLimit, msec, maxRetryWait);
67
+ }
68
+
69
+ public RetryExecutor withMaxRetryWait(int msec)
70
+ {
71
+ return new RetryExecutor(retryLimit, initialRetryWait, msec);
72
+ }
73
+
74
+ public <T> T runInterruptible(Retryable<T> op)
75
+ throws InterruptedException, RetryGiveupException
76
+ {
77
+ return run(op, true);
78
+ }
79
+
80
+ public <T> T run(Retryable<T> op)
81
+ throws RetryGiveupException
82
+ {
83
+ try {
84
+ return run(op, false);
85
+ } catch (InterruptedException ex) {
86
+ throw new RetryGiveupException("Unexpected interruption", ex);
87
+ }
88
+ }
89
+
90
+ private <T> T run(Retryable<T> op, boolean interruptible)
91
+ throws InterruptedException, RetryGiveupException
92
+ {
93
+ int retryWait = initialRetryWait;
94
+ int retryCount = 0;
95
+
96
+ Exception firstException = null;
97
+
98
+ while(true) {
99
+ try {
100
+ return op.call();
101
+ } catch (Exception exception) {
102
+ if (firstException == null) {
103
+ firstException = exception;
104
+ }
105
+ if (!op.isRetryableException(exception) || retryCount >= retryLimit) {
106
+ op.onGiveup(firstException, exception);
107
+ throw new RetryGiveupException(firstException);
108
+ }
109
+
110
+ retryCount++;
111
+ op.onRetry(exception, retryCount, retryLimit, retryWait);
112
+
113
+ try {
114
+ Thread.sleep(retryWait);
115
+ } catch (InterruptedException ex) {
116
+ if (interruptible) {
117
+ throw ex;
118
+ }
119
+ }
120
+
121
+ // exponential back-off with hard limit
122
+ retryWait *= 2;
123
+ if (retryWait > maxRetryWait) {
124
+ retryWait = maxRetryWait;
125
+ }
126
+ }
127
+ }
128
+ }
129
+ }
130
+
@@ -0,0 +1,128 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.io.InputStream;
4
+ import java.io.IOException;
5
+
6
+ public class RetryableInputStream
7
+ extends InputStream
8
+ {
9
+ public interface Opener
10
+ {
11
+ public InputStream open(long offset, Exception exception) throws IOException;
12
+ }
13
+
14
+ private final Opener opener;
15
+ protected InputStream in;
16
+ private long offset;
17
+ private long markedOffset;
18
+
19
+ public RetryableInputStream(InputStream initialInputStream, Opener reopener)
20
+ {
21
+ this.opener = reopener;
22
+ this.in = initialInputStream;
23
+ this.offset = 0L;
24
+ this.markedOffset = 0L;
25
+ }
26
+
27
+ public RetryableInputStream(Opener opener) throws IOException
28
+ {
29
+ this(opener.open(0, null), opener);
30
+ }
31
+
32
+ private void reopen(Exception exception) throws IOException
33
+ {
34
+ if (in != null) {
35
+ in.close();
36
+ in = null;
37
+ }
38
+ in = opener.open(offset, exception);
39
+ }
40
+
41
+ @Override
42
+ public int read() throws IOException
43
+ {
44
+ while (true) {
45
+ try {
46
+ int v = in.read();
47
+ offset += 1;
48
+ return v;
49
+ } catch (IOException | RuntimeException ex) {
50
+ reopen(ex);
51
+ }
52
+ }
53
+ }
54
+
55
+ @Override
56
+ public int read(byte[] b) throws IOException
57
+ {
58
+ while (true) {
59
+ try {
60
+ int r = in.read(b);
61
+ offset += r;
62
+ return r;
63
+ } catch (IOException | RuntimeException ex) {
64
+ reopen(ex);
65
+ }
66
+ }
67
+ }
68
+
69
+ @Override
70
+ public int read(byte[] b, int off, int len) throws IOException
71
+ {
72
+ while (true) {
73
+ try {
74
+ int r = in.read(b, off, len);
75
+ offset += r;
76
+ return r;
77
+ } catch (IOException | RuntimeException ex) {
78
+ reopen(ex);
79
+ }
80
+ }
81
+ }
82
+
83
+ @Override
84
+ public long skip(long n) throws IOException
85
+ {
86
+ while (true) {
87
+ try {
88
+ long r = in.skip(n);
89
+ offset += r;
90
+ return r;
91
+ } catch (IOException | RuntimeException ex) {
92
+ reopen(ex);
93
+ }
94
+ }
95
+ }
96
+
97
+ @Override
98
+ public int available() throws IOException
99
+ {
100
+ return in.available();
101
+ }
102
+
103
+ @Override
104
+ public void close() throws IOException
105
+ {
106
+ in.close();
107
+ }
108
+
109
+ @Override
110
+ public void mark(int readlimit)
111
+ {
112
+ in.mark(readlimit);
113
+ markedOffset = offset;
114
+ }
115
+
116
+ @Override
117
+ public void reset() throws IOException
118
+ {
119
+ in.reset();
120
+ offset = markedOffset;
121
+ }
122
+
123
+ @Override
124
+ public boolean markSupported()
125
+ {
126
+ return in.markSupported();
127
+ }
128
+ }
@@ -4,9 +4,11 @@ import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Collections;
6
6
  import java.io.IOException;
7
+ import java.io.InterruptedIOException;
7
8
  import java.io.InputStream;
8
9
  import com.google.common.collect.ImmutableList;
9
10
  import com.google.common.base.Optional;
11
+ import com.google.common.base.Throwables;
10
12
  import org.slf4j.Logger;
11
13
  import com.amazonaws.auth.AWSCredentials;
12
14
  import com.amazonaws.auth.AWSCredentialsProvider;
@@ -32,6 +34,9 @@ import org.embulk.spi.Exec;
32
34
  import org.embulk.spi.FileInputPlugin;
33
35
  import org.embulk.spi.TransactionalFileInput;
34
36
  import org.embulk.spi.util.InputStreamFileInput;
37
+ import org.embulk.input.s3.RetryExecutor.Retryable;
38
+ import org.embulk.input.s3.RetryExecutor.RetryGiveupException;
39
+ import static org.embulk.input.s3.RetryExecutor.retryExecutor;
35
40
 
36
41
  public class S3FileInputPlugin
37
42
  implements FileInputPlugin
@@ -189,6 +194,73 @@ public class S3FileInputPlugin
189
194
  return new S3FileInput(task, taskIndex);
190
195
  }
191
196
 
197
+ private static class S3RetryableOpener
198
+ implements RetryableInputStream.Opener
199
+ {
200
+ private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
201
+
202
+ private final AmazonS3Client client;
203
+ private final GetObjectRequest request;
204
+ private final long contentLength;
205
+
206
+ public S3RetryableOpener(AmazonS3Client client, GetObjectRequest request, long contentLength)
207
+ {
208
+ this.client = client;
209
+ this.request = request;
210
+ this.contentLength = contentLength;
211
+ }
212
+
213
+ @Override
214
+ public InputStream open(final long offset, final Exception exception) throws IOException
215
+ {
216
+ try {
217
+ return retryExecutor()
218
+ .withRetryLimit(3)
219
+ .withInitialRetryWait(500)
220
+ .withMaxRetryWait(30*1000)
221
+ .runInterruptible(new Retryable<InputStream>() {
222
+ @Override
223
+ public InputStream call() throws InterruptedIOException
224
+ {
225
+ log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), exception);
226
+ request.setRange(offset, contentLength - 1); // [first, last]
227
+ return client.getObject(request).getObjectContent();
228
+ }
229
+
230
+ @Override
231
+ public boolean isRetryableException(Exception exception)
232
+ {
233
+ return true; // TODO
234
+ }
235
+
236
+ @Override
237
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
238
+ throws RetryGiveupException
239
+ {
240
+ String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
241
+ retryCount, retryLimit, retryWait/1000, exception.getMessage());
242
+ if (retryCount % 3 == 0) {
243
+ log.warn(message, exception);
244
+ } else {
245
+ log.warn(message);
246
+ }
247
+ }
248
+
249
+ @Override
250
+ public void onGiveup(Exception firstException, Exception lastException)
251
+ throws RetryGiveupException
252
+ {
253
+ }
254
+ });
255
+ } catch (RetryGiveupException ex) {
256
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
257
+ throw Throwables.propagate(ex.getCause());
258
+ } catch (InterruptedException ex) {
259
+ throw new InterruptedIOException();
260
+ }
261
+ }
262
+ }
263
+
192
264
  public static class S3FileInput
193
265
  extends InputStreamFileInput
194
266
  implements TransactionalFileInput
@@ -217,15 +289,8 @@ public class S3FileInputPlugin
217
289
  }
218
290
  opened = true;
219
291
  GetObjectRequest request = new GetObjectRequest(bucket, key);
220
- //if (pos > 0) {
221
- // request.setRange(pos, contentLength);
222
- //}
223
292
  S3Object obj = client.getObject(request);
224
- //if (pos <= 0) {
225
- // // first call
226
- // contentLength = obj.getObjectMetadata().getContentLength();
227
- //}
228
- return obj.getObjectContent();
293
+ return new RetryableInputStream(obj.getObjectContent(), new S3RetryableOpener(client, request, obj.getObjectMetadata().getContentLength()));
229
294
  }
230
295
 
231
296
  @Override
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -54,6 +54,8 @@ files:
54
54
  - gradlew
55
55
  - gradlew.bat
56
56
  - lib/embulk/input/s3.rb
57
+ - src/main/java/org/embulk/input/s3/RetryExecutor.java
58
+ - src/main/java/org/embulk/input/s3/RetryableInputStream.java
57
59
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
58
60
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
59
61
  - classpath/aws-java-sdk-core-1.9.22.jar
@@ -61,7 +63,7 @@ files:
61
63
  - classpath/aws-java-sdk-s3-1.9.22.jar
62
64
  - classpath/commons-codec-1.6.jar
63
65
  - classpath/commons-logging-1.1.3.jar
64
- - classpath/embulk-input-s3-0.1.4.jar
66
+ - classpath/embulk-input-s3-0.1.5.jar
65
67
  - classpath/httpclient-4.3.4.jar
66
68
  - classpath/httpcore-4.3.2.jar
67
69
  homepage: https://github.com/embulk/embulk-input-s3