embulk-input-gcs 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 04cb5b37d8fb8c70e1c9c4c306cf792ac1ad1ec9
4
- data.tar.gz: 44ec9518fc188320a19ffbe2ce7db0b07cda30f3
3
+ metadata.gz: 5c7a8f8dcd33afc9c58b25bc3604dae90a407fb4
4
+ data.tar.gz: bd5afe31028d098c805e7096a6939e877bc2633e
5
5
  SHA512:
6
- metadata.gz: 3d2a59336002f07d48bcf8b5b22b3626e9c59df7aad93a4f91721667f6f7e1552bd1db72ebd1323e19ce1dc67f32da64b9ad88bc6d6785591904b8d87059249f
7
- data.tar.gz: 1533eccae86b7355303ab91bc5ab08cf49c816438fef3541ad1318fc24f0eab948667132952746173a2dd3dabea41bc06cfc2493b8b4701c1908d2e07ff9b635
6
+ metadata.gz: 9e761bc69cbb53342b4f7cc943bff476e8678a6dc4f22306039d2269fd91d4651c797d51d6891580af148d53d4cc54fb3a7a45710c9cada571dd59db88b12842
7
+ data.tar.gz: 2e03a2ca66c9daae76190f70c5dda99c437cf783e77ff5dd6ed1fe15dbe6633e4644350f53d303eb7e0aa06af84540d4bd6d5f6ffb65299819e508a397b5f9d6
@@ -1,5 +1,8 @@
1
+ ## 0.2.7 - 2018-03-22
2
+ * [maintenance] Fix retry logic to avoid IOException happens while IOUtils.copy() [#33](https://github.com/embulk/embulk-input-gcs/pull/33)
3
+
1
4
  ## 0.2.6 - 2018-03-05
2
- * [maintenance] Support "path_match_pattern" option [#32](https://github.com/embulk/embulk-input-gcs/pull/32)
5
+ * [new feature] Support "path_match_pattern" option [#32](https://github.com/embulk/embulk-input-gcs/pull/32)
3
6
 
4
7
  ## 0.2.5 - 2017-05-19
5
8
  * [maintenance] Fix InputStream handling to avoid plugin get less records than expected [#27](https://github.com/embulk/embulk-input-gcs/pull/27)
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.2.6"
20
+ version = "0.2.7"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -2,11 +2,9 @@ package org.embulk.input.gcs;
2
2
 
3
3
  import com.google.api.client.util.IOUtils;
4
4
  import com.google.api.services.storage.Storage;
5
- import com.google.common.annotations.VisibleForTesting;
6
5
  import com.google.common.base.Throwables;
7
6
  import org.embulk.spi.Exec;
8
7
  import org.embulk.spi.util.InputStreamFileInput;
9
- import org.embulk.spi.util.ResumableInputStream;
10
8
  import org.embulk.spi.util.RetryExecutor;
11
9
  import org.slf4j.Logger;
12
10
 
@@ -17,7 +15,6 @@ import java.io.FileInputStream;
17
15
  import java.io.FileOutputStream;
18
16
  import java.io.IOException;
19
17
  import java.io.InputStream;
20
- import java.io.InterruptedIOException;
21
18
  import java.util.Iterator;
22
19
 
23
20
  import static org.embulk.spi.util.RetryExecutor.retryExecutor;
@@ -30,6 +27,7 @@ public class SingleFileProvider
30
27
  private final Iterator<String> iterator;
31
28
  private final int maxConnectionRetry;
32
29
  private boolean opened = false;
30
+ private final Logger log = Exec.getLogger(SingleFileProvider.class);
33
31
 
34
32
  public SingleFileProvider(PluginTask task, int taskIndex)
35
33
  {
@@ -50,12 +48,9 @@ public class SingleFileProvider
50
48
  return null;
51
49
  }
52
50
  String key = iterator.next();
53
- Storage.Objects.Get getObject = client.objects().get(bucket, key);
54
51
  File tempFile = Exec.getTempFileSpace().createTempFile();
55
- try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
56
- IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
57
- }
58
- return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
52
+ getRemoteContentsWithRetry(tempFile, client, bucket, key, maxConnectionRetry);
53
+ return new BufferedInputStream(new FileInputStream(tempFile));
59
54
  }
60
55
 
61
56
  @Override
@@ -63,81 +58,53 @@ public class SingleFileProvider
63
58
  {
64
59
  }
65
60
 
66
- @VisibleForTesting
67
- static class GcsInputStreamReopener
68
- implements ResumableInputStream.Reopener
61
+ private Void getRemoteContentsWithRetry(final File tempFile, final Storage client, final String bucket, final String key, int maxConnectionRetry)
69
62
  {
70
- private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
71
- private final File tempFile;
72
- private final Storage client;
73
- private final String bucket;
74
- private final String key;
75
- private final int maxConnectionRetry;
76
-
77
- public GcsInputStreamReopener(File tempFile, Storage client, String bucket, String key, int maxConnectionRetry)
78
- {
79
- this.tempFile = tempFile;
80
- this.client = client;
81
- this.bucket = bucket;
82
- this.key = key;
83
- this.maxConnectionRetry = maxConnectionRetry;
84
- }
85
-
86
- @Override
87
- public InputStream reopen(final long offset, final Exception closedCause) throws IOException
88
- {
89
- try {
90
- return retryExecutor()
91
- .withRetryLimit(maxConnectionRetry)
92
- .withInitialRetryWait(500)
93
- .withMaxRetryWait(30 * 1000)
94
- .runInterruptible(new RetryExecutor.Retryable<InputStream>() {
95
- @Override
96
- public InputStream call() throws IOException
97
- {
98
- log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
99
- Storage.Objects.Get getObject = client.objects().get(bucket, key);
100
-
101
- try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
102
- IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
103
- }
104
- return new BufferedInputStream(new FileInputStream(tempFile));
63
+ try {
64
+ return retryExecutor()
65
+ .withRetryLimit(maxConnectionRetry)
66
+ .withInitialRetryWait(500)
67
+ .withMaxRetryWait(30 * 1000)
68
+ .runInterruptible(new RetryExecutor.Retryable<Void>() {
69
+ @Override
70
+ public Void call() throws IOException
71
+ {
72
+ Storage.Objects.Get getObject = client.objects().get(bucket, key);
73
+ try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
74
+ IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
105
75
  }
76
+ return null;
77
+ }
106
78
 
107
- @Override
108
- public boolean isRetryableException(Exception exception)
109
- {
110
- return true; // TODO
111
- }
79
+ @Override
80
+ public boolean isRetryableException(Exception exception)
81
+ {
82
+ return true; // TODO
83
+ }
112
84
 
113
- @Override
114
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
115
- throws RetryExecutor.RetryGiveupException
116
- {
117
- String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
118
- retryCount, retryLimit, retryWait / 1000, exception.getMessage());
119
- if (retryCount % 3 == 0) {
120
- log.warn(message, exception);
121
- }
122
- else {
123
- log.warn(message);
124
- }
85
+ @Override
86
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
87
+ throws RetryExecutor.RetryGiveupException
88
+ {
89
+ String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
90
+ retryCount, retryLimit, retryWait / 1000, exception.getMessage());
91
+ if (retryCount % 3 == 0) {
92
+ log.warn(message, exception);
125
93
  }
126
-
127
- @Override
128
- public void onGiveup(Exception firstException, Exception lastException)
129
- throws RetryExecutor.RetryGiveupException
130
- {
94
+ else {
95
+ log.warn(message);
131
96
  }
132
- });
133
- }
134
- catch (RetryExecutor.RetryGiveupException ex) {
135
- Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
136
- throw Throwables.propagate(ex.getCause());
137
- }
138
- catch (InterruptedException ex) {
139
- throw new InterruptedIOException();
140
- }
97
+ }
98
+
99
+ @Override
100
+ public void onGiveup(Exception firstException, Exception lastException)
101
+ throws RetryExecutor.RetryGiveupException
102
+ {
103
+ }
104
+ });
105
+ }
106
+ catch (RetryExecutor.RetryGiveupException | InterruptedException ex) {
107
+ throw Throwables.propagate(ex.getCause());
141
108
  }
142
109
  }
143
110
  }
@@ -24,12 +24,7 @@ import org.junit.BeforeClass;
24
24
  import org.junit.Rule;
25
25
  import org.junit.Test;
26
26
 
27
- import java.io.BufferedReader;
28
-
29
- import java.io.File;
30
27
  import java.io.IOException;
31
- import java.io.InputStream;
32
- import java.io.InputStreamReader;
33
28
  import java.security.GeneralSecurityException;
34
29
  import java.util.ArrayList;
35
30
  import java.util.Arrays;
@@ -420,35 +415,6 @@ public class TestGcsFileInputPlugin
420
415
  assertRecords(config, output);
421
416
  }
422
417
 
423
- @Test
424
- public void testGcsFileInputByReopen()
425
- throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, IOException
426
- {
427
- ConfigSource config = Exec.newConfigSource()
428
- .set("bucket", GCP_BUCKET)
429
- .set("path_prefix", GCP_PATH_PREFIX)
430
- .set("auth_method", "json_key")
431
- .set("service_account_email", GCP_EMAIL)
432
- .set("json_keyfile", GCP_JSON_KEYFILE)
433
- .set("parser", parserConfig(schemaConfig()));
434
-
435
- PluginTask task = config.loadConfig(PluginTask.class);
436
- runner.transaction(config, new Control());
437
-
438
- Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
439
- method.setAccessible(true);
440
- Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
441
- File tempFile = Exec.getTempFileSpace().createTempFile();
442
- task.setFiles(GcsFileInput.listFiles(task, client));
443
-
444
- String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
445
- SingleFileProvider.GcsInputStreamReopener opener = new SingleFileProvider.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
446
- try (InputStream in = opener.reopen(0, new RuntimeException())) {
447
- BufferedReader r = new BufferedReader(new InputStreamReader(in));
448
- assertEquals("id,account,time,purchase,comment", r.readLine());
449
- }
450
- }
451
-
452
418
  @Test
453
419
  public void testBase64()
454
420
  throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-05 00:00:00.000000000 Z
11
+ date: 2018-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -71,7 +71,7 @@ files:
71
71
  - src/test/resources/secretkeys.tar.enc
72
72
  - classpath/commons-codec-1.3.jar
73
73
  - classpath/commons-logging-1.1.1.jar
74
- - classpath/embulk-input-gcs-0.2.6.jar
74
+ - classpath/embulk-input-gcs-0.2.7.jar
75
75
  - classpath/google-api-client-1.21.0.jar
76
76
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
77
77
  - classpath/google-http-client-1.21.0.jar