embulk-input-gcs 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fd9bbc407e6e93967a7d08fcd72b4d4f0c419659
4
- data.tar.gz: 86ebeaf4dace9de06146a4671d6fe3db69089b5c
3
+ metadata.gz: a7f7692d41284e6ac5299bb159a47416b4a31b5a
4
+ data.tar.gz: c970445b2be3449e3691e63cc139ca3f362333c8
5
5
  SHA512:
6
- metadata.gz: 783728ec908c8f598acfac3ec555075a55b7249943afc76d030f2fe57d9353ddbb78cbecabdba9504189d943b9574555d3ba301a4ee654f5c78b81c179abdef4
7
- data.tar.gz: 0e8815c728a060f749e8d797938235b72eed889c2155b635256646e5816432326555648cf229ef76a01e969aca6ec659eb5ce976c43f32ee64f6a3d3ce451d69
6
+ metadata.gz: 9220e5404d2edfd40193813563ce40d487fabab48e78073c256193eac383edd1cae8e84700e97eb47e7513ba0ec8002d19ae7879b63c7a57961852fec0a40348
7
+ data.tar.gz: 104febfa4425cac2622d381222c18c3c11033e965731087b7a9fa69f34907cd7f543747b84ffaac83ca4b23b3c2680cf413fc858661c9bd74cbc87d0a6be3342
data/ChangeLog CHANGED
@@ -1,3 +1,6 @@
1
+ Release 0.1.11 - 2016-01-25
2
+ * Added retry logic
3
+
1
4
  Release 0.1.10 - 2015-11-07
2
5
 
3
6
  * Fix resume download logics
@@ -16,7 +16,7 @@ configurations {
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
19
- version = "0.1.10"
19
+ version = "0.1.11"
20
20
 
21
21
  dependencies {
22
22
  compile "org.embulk:embulk-core:0.7.5"
@@ -4,6 +4,7 @@ import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Collections;
6
6
  import java.io.IOException;
7
+ import java.io.InterruptedIOException;
7
8
  import java.io.InputStream;
8
9
  import java.math.BigInteger;
9
10
 
@@ -14,6 +15,7 @@ import com.google.common.base.Function;
14
15
  import com.google.common.base.Throwables;
15
16
  import com.google.common.base.Charsets;
16
17
  import com.google.common.io.BaseEncoding;
18
+ import com.google.common.annotations.VisibleForTesting;
17
19
  import java.security.GeneralSecurityException;
18
20
 
19
21
  import org.embulk.config.TaskReport;
@@ -31,6 +33,10 @@ import org.embulk.spi.FileInputPlugin;
31
33
  import org.embulk.spi.TransactionalFileInput;
32
34
  import org.embulk.spi.unit.LocalFile;
33
35
  import org.embulk.spi.util.InputStreamFileInput;
36
+ import org.embulk.spi.util.ResumableInputStream;
37
+ import org.embulk.spi.util.RetryExecutor.Retryable;
38
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
39
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
34
40
 
35
41
  import org.slf4j.Logger;
36
42
 
@@ -278,6 +284,73 @@ public class GcsFileInputPlugin
278
284
  return new GcsFileInput(task, taskIndex);
279
285
  }
280
286
 
287
+ @VisibleForTesting
288
+ static class GcsInputStreamReopener
289
+ implements ResumableInputStream.Reopener
290
+ {
291
+ private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
292
+ private final Storage client;
293
+ private final String bucket;
294
+ private final String key;
295
+
296
+ public GcsInputStreamReopener(Storage client, String bucket, String key)
297
+ {
298
+ this.client = client;
299
+ this.bucket = bucket;
300
+ this.key = key;
301
+ }
302
+
303
+ @Override
304
+ public InputStream reopen(final long offset, final Exception closedCause) throws IOException
305
+ {
306
+ try {
307
+ return retryExecutor()
308
+ .withRetryLimit(3)
309
+ .withInitialRetryWait(500)
310
+ .withMaxRetryWait(30 * 1000)
311
+ .runInterruptible(new Retryable<InputStream>() {
312
+ @Override
313
+ public InputStream call() throws InterruptedIOException, IOException
314
+ {
315
+ log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
316
+ Storage.Objects.Get getObject = client.objects().get(bucket, key);
317
+ return getObject.executeMediaAsInputStream();
318
+ }
319
+
320
+ @Override
321
+ public boolean isRetryableException(Exception exception)
322
+ {
323
+ return true; // TODO
324
+ }
325
+
326
+ @Override
327
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
328
+ throws RetryGiveupException
329
+ {
330
+ String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
331
+ retryCount, retryLimit, retryWait/1000, exception.getMessage());
332
+ if (retryCount % 3 == 0) {
333
+ log.warn(message, exception);
334
+ } else {
335
+ log.warn(message);
336
+ }
337
+ }
338
+
339
+ @Override
340
+ public void onGiveup(Exception firstException, Exception lastException)
341
+ throws RetryGiveupException
342
+ {
343
+ }
344
+ });
345
+ } catch (RetryGiveupException ex) {
346
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
347
+ throw Throwables.propagate(ex.getCause());
348
+ } catch (InterruptedException ex) {
349
+ throw new InterruptedIOException();
350
+ }
351
+ }
352
+ }
353
+
281
354
  public class GcsFileInput
282
355
  extends InputStreamFileInput
283
356
  implements TransactionalFileInput
@@ -322,7 +395,7 @@ public class GcsFileInputPlugin
322
395
  opened = true;
323
396
  Storage.Objects.Get getObject = client.objects().get(bucket, key);
324
397
 
325
- return getObject.executeMediaAsInputStream();
398
+ return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key));
326
399
  }
327
400
 
328
401
  @Override
@@ -7,6 +7,9 @@ import com.google.common.collect.ImmutableMap;
7
7
  import com.google.common.base.Optional;
8
8
  import com.google.common.collect.ImmutableList;
9
9
  import com.google.common.collect.Lists;
10
+ import java.io.BufferedReader;
11
+ import java.io.InputStream;
12
+ import java.io.InputStreamReader;
10
13
  import java.io.IOException;
11
14
  import java.security.GeneralSecurityException;
12
15
 
@@ -199,7 +202,7 @@ public class TestGcsFileInputPlugin
199
202
  .set("json_keyfile", GCP_JSON_KEYFILE)
200
203
  .set("parser", parserConfig(schemaConfig()));
201
204
 
202
- PluginTask task = config().loadConfig(PluginTask.class);
205
+ PluginTask task = config.loadConfig(PluginTask.class);
203
206
  runner.transaction(config, new Control());
204
207
 
205
208
  Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
@@ -292,6 +295,34 @@ public class TestGcsFileInputPlugin
292
295
  assertRecords(config, output);
293
296
  }
294
297
 
298
+ @Test
299
+ public void testGcsFileInputByReopen()
300
+ throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, IOException
301
+ {
302
+ ConfigSource config = Exec.newConfigSource()
303
+ .set("bucket", GCP_BUCKET)
304
+ .set("path_prefix", GCP_PATH_PREFIX)
305
+ .set("auth_method", "json_key")
306
+ .set("service_account_email", GCP_EMAIL)
307
+ .set("json_keyfile", GCP_JSON_KEYFILE)
308
+ .set("parser", parserConfig(schemaConfig()));
309
+
310
+ PluginTask task = config.loadConfig(PluginTask.class);
311
+ runner.transaction(config, new Control());
312
+
313
+ Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
314
+ method.setAccessible(true);
315
+ Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
316
+ task.setFiles(plugin.listFiles(task, client));
317
+
318
+ String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
319
+ GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key);
320
+ try (InputStream in = opener.reopen(0, new RuntimeException())) {
321
+ BufferedReader r = new BufferedReader(new InputStreamReader(in));
322
+ assertEquals("id,account,time,purchase,comment", r.readLine());
323
+ }
324
+ }
325
+
295
326
  @Test
296
327
  public void testBase64()
297
328
  throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-07 00:00:00.000000000 Z
11
+ date: 2016-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -63,7 +63,7 @@ files:
63
63
  - src/test/resources/sample_02.csv
64
64
  - classpath/commons-codec-1.3.jar
65
65
  - classpath/commons-logging-1.1.1.jar
66
- - classpath/embulk-input-gcs-0.1.10.jar
66
+ - classpath/embulk-input-gcs-0.1.11.jar
67
67
  - classpath/google-api-client-1.19.1.jar
68
68
  - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
69
69
  - classpath/google-http-client-1.19.0.jar