embulk-input-s3 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6b8d337887e940e5fbb90c02ca32fe278ad21ea1
4
- data.tar.gz: a438072bd951e5b2bee43e722da2007859e5bc27
3
+ metadata.gz: ea85a2598851ff4ca60a0bc98385fa908db356b1
4
+ data.tar.gz: 76f750ed20b19f1bc7a1d1dd56f362ad8e1bc15d
5
5
  SHA512:
6
- metadata.gz: 84e08341b88f200133fa2e0a562409d1f1a6b122c0408eae14761174445641f995273f94b81340b82373d91b78c3b854986926a8ac7c7cfe1d76fd166870f169
7
- data.tar.gz: d2372bb97d642951ff2c0c67088ea5c8ca1c859a11fee050b2b7848f3f616bd8b0dd47c4f15743bb4185d52dd4b04d77bd1983ee0e671b26b010b280e82c5a4a
6
+ metadata.gz: 61c3385b21b589b29f8b69b2f26cd16522a0ce2057cf8bcb53f1a3cf7814612f5115326b6929d2886693692cae6841ae9e881d2d2220f48073fd498d4a9a948c
7
+ data.tar.gz: 88ca3e86cea694e4927d85615de0d68e03ebad6b05a95029726bd93851b88ebd1b1ea90a529d36658f59cbd628a381f8d7ba28001ab3a3c92e226447260a1618
data/ChangeLog CHANGED
@@ -1,4 +1,4 @@
1
- Release 0.1.4 - 2015-03-23
1
+ Release 0.1.5 - 2015-03-23
2
2
 
3
3
  * Implemented retrying
4
4
 
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.4"
16
+ version = "0.1.5"
17
17
 
18
18
  dependencies {
19
19
  compile "org.embulk:embulk-core:0.5.3"
@@ -0,0 +1,130 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.util.concurrent.Callable;
4
+ import java.util.concurrent.ExecutionException;
5
+
6
+ public class RetryExecutor
7
+ {
8
+ public static RetryExecutor retryExecutor()
9
+ {
10
+ // TODO default configuration
11
+ return new RetryExecutor(3, 500, 30*60*1000);
12
+ }
13
+
14
+ public static class RetryGiveupException
15
+ extends ExecutionException
16
+ {
17
+ public RetryGiveupException(String message, Exception cause)
18
+ {
19
+ super(cause);
20
+ }
21
+
22
+ public RetryGiveupException(Exception cause)
23
+ {
24
+ super(cause);
25
+ }
26
+
27
+ public Exception getCause()
28
+ {
29
+ return (Exception) super.getCause();
30
+ }
31
+ }
32
+
33
+ public static interface Retryable<T>
34
+ extends Callable<T>
35
+ {
36
+ public T call()
37
+ throws Exception;
38
+
39
+ public boolean isRetryableException(Exception exception);
40
+
41
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
42
+ throws RetryGiveupException;
43
+
44
+ public void onGiveup(Exception firstException, Exception lastException)
45
+ throws RetryGiveupException;
46
+ }
47
+
48
+ private final int retryLimit;
49
+ private final int initialRetryWait;
50
+ private final int maxRetryWait;
51
+
52
+ private RetryExecutor(int retryLimit, int initialRetryWait, int maxRetryWait)
53
+ {
54
+ this.retryLimit = retryLimit;
55
+ this.initialRetryWait = initialRetryWait;
56
+ this.maxRetryWait = maxRetryWait;
57
+ }
58
+
59
+ public RetryExecutor withRetryLimit(int count)
60
+ {
61
+ return new RetryExecutor(count, initialRetryWait, maxRetryWait);
62
+ }
63
+
64
+ public RetryExecutor withInitialRetryWait(int msec)
65
+ {
66
+ return new RetryExecutor(retryLimit, msec, maxRetryWait);
67
+ }
68
+
69
+ public RetryExecutor withMaxRetryWait(int msec)
70
+ {
71
+ return new RetryExecutor(retryLimit, initialRetryWait, msec);
72
+ }
73
+
74
+ public <T> T runInterruptible(Retryable<T> op)
75
+ throws InterruptedException, RetryGiveupException
76
+ {
77
+ return run(op, true);
78
+ }
79
+
80
+ public <T> T run(Retryable<T> op)
81
+ throws RetryGiveupException
82
+ {
83
+ try {
84
+ return run(op, false);
85
+ } catch (InterruptedException ex) {
86
+ throw new RetryGiveupException("Unexpected interruption", ex);
87
+ }
88
+ }
89
+
90
+ private <T> T run(Retryable<T> op, boolean interruptible)
91
+ throws InterruptedException, RetryGiveupException
92
+ {
93
+ int retryWait = initialRetryWait;
94
+ int retryCount = 0;
95
+
96
+ Exception firstException = null;
97
+
98
+ while(true) {
99
+ try {
100
+ return op.call();
101
+ } catch (Exception exception) {
102
+ if (firstException == null) {
103
+ firstException = exception;
104
+ }
105
+ if (!op.isRetryableException(exception) || retryCount >= retryLimit) {
106
+ op.onGiveup(firstException, exception);
107
+ throw new RetryGiveupException(firstException);
108
+ }
109
+
110
+ retryCount++;
111
+ op.onRetry(exception, retryCount, retryLimit, retryWait);
112
+
113
+ try {
114
+ Thread.sleep(retryWait);
115
+ } catch (InterruptedException ex) {
116
+ if (interruptible) {
117
+ throw ex;
118
+ }
119
+ }
120
+
121
+ // exponential back-off with hard limit
122
+ retryWait *= 2;
123
+ if (retryWait > maxRetryWait) {
124
+ retryWait = maxRetryWait;
125
+ }
126
+ }
127
+ }
128
+ }
129
+ }
130
+
@@ -0,0 +1,128 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.io.InputStream;
4
+ import java.io.IOException;
5
+
6
+ public class RetryableInputStream
7
+ extends InputStream
8
+ {
9
+ public interface Opener
10
+ {
11
+ public InputStream open(long offset, Exception exception) throws IOException;
12
+ }
13
+
14
+ private final Opener opener;
15
+ protected InputStream in;
16
+ private long offset;
17
+ private long markedOffset;
18
+
19
+ public RetryableInputStream(InputStream initialInputStream, Opener reopener)
20
+ {
21
+ this.opener = reopener;
22
+ this.in = initialInputStream;
23
+ this.offset = 0L;
24
+ this.markedOffset = 0L;
25
+ }
26
+
27
+ public RetryableInputStream(Opener opener) throws IOException
28
+ {
29
+ this(opener.open(0, null), opener);
30
+ }
31
+
32
+ private void reopen(Exception exception) throws IOException
33
+ {
34
+ if (in != null) {
35
+ in.close();
36
+ in = null;
37
+ }
38
+ in = opener.open(offset, exception);
39
+ }
40
+
41
+ @Override
42
+ public int read() throws IOException
43
+ {
44
+ while (true) {
45
+ try {
46
+ int v = in.read();
47
+ offset += 1;
48
+ return v;
49
+ } catch (IOException | RuntimeException ex) {
50
+ reopen(ex);
51
+ }
52
+ }
53
+ }
54
+
55
+ @Override
56
+ public int read(byte[] b) throws IOException
57
+ {
58
+ while (true) {
59
+ try {
60
+ int r = in.read(b);
61
+ offset += r;
62
+ return r;
63
+ } catch (IOException | RuntimeException ex) {
64
+ reopen(ex);
65
+ }
66
+ }
67
+ }
68
+
69
+ @Override
70
+ public int read(byte[] b, int off, int len) throws IOException
71
+ {
72
+ while (true) {
73
+ try {
74
+ int r = in.read(b, off, len);
75
+ offset += r;
76
+ return r;
77
+ } catch (IOException | RuntimeException ex) {
78
+ reopen(ex);
79
+ }
80
+ }
81
+ }
82
+
83
+ @Override
84
+ public long skip(long n) throws IOException
85
+ {
86
+ while (true) {
87
+ try {
88
+ long r = in.skip(n);
89
+ offset += r;
90
+ return r;
91
+ } catch (IOException | RuntimeException ex) {
92
+ reopen(ex);
93
+ }
94
+ }
95
+ }
96
+
97
+ @Override
98
+ public int available() throws IOException
99
+ {
100
+ return in.available();
101
+ }
102
+
103
+ @Override
104
+ public void close() throws IOException
105
+ {
106
+ in.close();
107
+ }
108
+
109
+ @Override
110
+ public void mark(int readlimit)
111
+ {
112
+ in.mark(readlimit);
113
+ markedOffset = offset;
114
+ }
115
+
116
+ @Override
117
+ public void reset() throws IOException
118
+ {
119
+ in.reset();
120
+ offset = markedOffset;
121
+ }
122
+
123
+ @Override
124
+ public boolean markSupported()
125
+ {
126
+ return in.markSupported();
127
+ }
128
+ }
@@ -4,9 +4,11 @@ import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Collections;
6
6
  import java.io.IOException;
7
+ import java.io.InterruptedIOException;
7
8
  import java.io.InputStream;
8
9
  import com.google.common.collect.ImmutableList;
9
10
  import com.google.common.base.Optional;
11
+ import com.google.common.base.Throwables;
10
12
  import org.slf4j.Logger;
11
13
  import com.amazonaws.auth.AWSCredentials;
12
14
  import com.amazonaws.auth.AWSCredentialsProvider;
@@ -32,6 +34,9 @@ import org.embulk.spi.Exec;
32
34
  import org.embulk.spi.FileInputPlugin;
33
35
  import org.embulk.spi.TransactionalFileInput;
34
36
  import org.embulk.spi.util.InputStreamFileInput;
37
+ import org.embulk.input.s3.RetryExecutor.Retryable;
38
+ import org.embulk.input.s3.RetryExecutor.RetryGiveupException;
39
+ import static org.embulk.input.s3.RetryExecutor.retryExecutor;
35
40
 
36
41
  public class S3FileInputPlugin
37
42
  implements FileInputPlugin
@@ -189,6 +194,73 @@ public class S3FileInputPlugin
189
194
  return new S3FileInput(task, taskIndex);
190
195
  }
191
196
 
197
+ private static class S3RetryableOpener
198
+ implements RetryableInputStream.Opener
199
+ {
200
+ private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
201
+
202
+ private final AmazonS3Client client;
203
+ private final GetObjectRequest request;
204
+ private final long contentLength;
205
+
206
+ public S3RetryableOpener(AmazonS3Client client, GetObjectRequest request, long contentLength)
207
+ {
208
+ this.client = client;
209
+ this.request = request;
210
+ this.contentLength = contentLength;
211
+ }
212
+
213
+ @Override
214
+ public InputStream open(final long offset, final Exception exception) throws IOException
215
+ {
216
+ try {
217
+ return retryExecutor()
218
+ .withRetryLimit(3)
219
+ .withInitialRetryWait(500)
220
+ .withMaxRetryWait(30*1000)
221
+ .runInterruptible(new Retryable<InputStream>() {
222
+ @Override
223
+ public InputStream call() throws InterruptedIOException
224
+ {
225
+ log.warn(String.format("S3 read failed. Retrying GET request with %,d bytes offset", offset), exception);
226
+ request.setRange(offset, contentLength - 1); // [first, last]
227
+ return client.getObject(request).getObjectContent();
228
+ }
229
+
230
+ @Override
231
+ public boolean isRetryableException(Exception exception)
232
+ {
233
+ return true; // TODO
234
+ }
235
+
236
+ @Override
237
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
238
+ throws RetryGiveupException
239
+ {
240
+ String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
241
+ retryCount, retryLimit, retryWait/1000, exception.getMessage());
242
+ if (retryCount % 3 == 0) {
243
+ log.warn(message, exception);
244
+ } else {
245
+ log.warn(message);
246
+ }
247
+ }
248
+
249
+ @Override
250
+ public void onGiveup(Exception firstException, Exception lastException)
251
+ throws RetryGiveupException
252
+ {
253
+ }
254
+ });
255
+ } catch (RetryGiveupException ex) {
256
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
257
+ throw Throwables.propagate(ex.getCause());
258
+ } catch (InterruptedException ex) {
259
+ throw new InterruptedIOException();
260
+ }
261
+ }
262
+ }
263
+
192
264
  public static class S3FileInput
193
265
  extends InputStreamFileInput
194
266
  implements TransactionalFileInput
@@ -217,15 +289,8 @@ public class S3FileInputPlugin
217
289
  }
218
290
  opened = true;
219
291
  GetObjectRequest request = new GetObjectRequest(bucket, key);
220
- //if (pos > 0) {
221
- // request.setRange(pos, contentLength);
222
- //}
223
292
  S3Object obj = client.getObject(request);
224
- //if (pos <= 0) {
225
- // // first call
226
- // contentLength = obj.getObjectMetadata().getContentLength();
227
- //}
228
- return obj.getObjectContent();
293
+ return new RetryableInputStream(obj.getObjectContent(), new S3RetryableOpener(client, request, obj.getObjectMetadata().getContentLength()));
229
294
  }
230
295
 
231
296
  @Override
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -54,6 +54,8 @@ files:
54
54
  - gradlew
55
55
  - gradlew.bat
56
56
  - lib/embulk/input/s3.rb
57
+ - src/main/java/org/embulk/input/s3/RetryExecutor.java
58
+ - src/main/java/org/embulk/input/s3/RetryableInputStream.java
57
59
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
58
60
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
59
61
  - classpath/aws-java-sdk-core-1.9.22.jar
@@ -61,7 +63,7 @@ files:
61
63
  - classpath/aws-java-sdk-s3-1.9.22.jar
62
64
  - classpath/commons-codec-1.6.jar
63
65
  - classpath/commons-logging-1.1.3.jar
64
- - classpath/embulk-input-s3-0.1.4.jar
66
+ - classpath/embulk-input-s3-0.1.5.jar
65
67
  - classpath/httpclient-4.3.4.jar
66
68
  - classpath/httpcore-4.3.2.jar
67
69
  homepage: https://github.com/embulk/embulk-input-s3