embulk-input-gcs 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c7a8f8dcd33afc9c58b25bc3604dae90a407fb4
|
4
|
+
data.tar.gz: bd5afe31028d098c805e7096a6939e877bc2633e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e761bc69cbb53342b4f7cc943bff476e8678a6dc4f22306039d2269fd91d4651c797d51d6891580af148d53d4cc54fb3a7a45710c9cada571dd59db88b12842
|
7
|
+
data.tar.gz: 2e03a2ca66c9daae76190f70c5dda99c437cf783e77ff5dd6ed1fe15dbe6633e4644350f53d303eb7e0aa06af84540d4bd6d5f6ffb65299819e508a397b5f9d6
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
## 0.2.7 - 2018-03-22
|
2
|
+
* [maintenance] Fix retry logic to avoid IOException happens while IOUtils.copy() [#33](https://github.com/embulk/embulk-input-gcs/pull/33)
|
3
|
+
|
1
4
|
## 0.2.6 - 2018-03-05
|
2
|
-
* [
|
5
|
+
* [new feature] Support "path_match_pattern" option [#32](https://github.com/embulk/embulk-input-gcs/pull/32)
|
3
6
|
|
4
7
|
## 0.2.5 - 2017-05-19
|
5
8
|
* [maintenance] Fix InputStream handling to avoid plugin get less records than expected [#27](https://github.com/embulk/embulk-input-gcs/pull/27)
|
data/build.gradle
CHANGED
@@ -2,11 +2,9 @@ package org.embulk.input.gcs;
|
|
2
2
|
|
3
3
|
import com.google.api.client.util.IOUtils;
|
4
4
|
import com.google.api.services.storage.Storage;
|
5
|
-
import com.google.common.annotations.VisibleForTesting;
|
6
5
|
import com.google.common.base.Throwables;
|
7
6
|
import org.embulk.spi.Exec;
|
8
7
|
import org.embulk.spi.util.InputStreamFileInput;
|
9
|
-
import org.embulk.spi.util.ResumableInputStream;
|
10
8
|
import org.embulk.spi.util.RetryExecutor;
|
11
9
|
import org.slf4j.Logger;
|
12
10
|
|
@@ -17,7 +15,6 @@ import java.io.FileInputStream;
|
|
17
15
|
import java.io.FileOutputStream;
|
18
16
|
import java.io.IOException;
|
19
17
|
import java.io.InputStream;
|
20
|
-
import java.io.InterruptedIOException;
|
21
18
|
import java.util.Iterator;
|
22
19
|
|
23
20
|
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
@@ -30,6 +27,7 @@ public class SingleFileProvider
|
|
30
27
|
private final Iterator<String> iterator;
|
31
28
|
private final int maxConnectionRetry;
|
32
29
|
private boolean opened = false;
|
30
|
+
private final Logger log = Exec.getLogger(SingleFileProvider.class);
|
33
31
|
|
34
32
|
public SingleFileProvider(PluginTask task, int taskIndex)
|
35
33
|
{
|
@@ -50,12 +48,9 @@ public class SingleFileProvider
|
|
50
48
|
return null;
|
51
49
|
}
|
52
50
|
String key = iterator.next();
|
53
|
-
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
54
51
|
File tempFile = Exec.getTempFileSpace().createTempFile();
|
55
|
-
|
56
|
-
|
57
|
-
}
|
58
|
-
return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
|
52
|
+
getRemoteContentsWithRetry(tempFile, client, bucket, key, maxConnectionRetry);
|
53
|
+
return new BufferedInputStream(new FileInputStream(tempFile));
|
59
54
|
}
|
60
55
|
|
61
56
|
@Override
|
@@ -63,81 +58,53 @@ public class SingleFileProvider
|
|
63
58
|
{
|
64
59
|
}
|
65
60
|
|
66
|
-
|
67
|
-
static class GcsInputStreamReopener
|
68
|
-
implements ResumableInputStream.Reopener
|
61
|
+
private Void getRemoteContentsWithRetry(final File tempFile, final Storage client, final String bucket, final String key, int maxConnectionRetry)
|
69
62
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
this.key = key;
|
83
|
-
this.maxConnectionRetry = maxConnectionRetry;
|
84
|
-
}
|
85
|
-
|
86
|
-
@Override
|
87
|
-
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
88
|
-
{
|
89
|
-
try {
|
90
|
-
return retryExecutor()
|
91
|
-
.withRetryLimit(maxConnectionRetry)
|
92
|
-
.withInitialRetryWait(500)
|
93
|
-
.withMaxRetryWait(30 * 1000)
|
94
|
-
.runInterruptible(new RetryExecutor.Retryable<InputStream>() {
|
95
|
-
@Override
|
96
|
-
public InputStream call() throws IOException
|
97
|
-
{
|
98
|
-
log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
99
|
-
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
100
|
-
|
101
|
-
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
102
|
-
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
103
|
-
}
|
104
|
-
return new BufferedInputStream(new FileInputStream(tempFile));
|
63
|
+
try {
|
64
|
+
return retryExecutor()
|
65
|
+
.withRetryLimit(maxConnectionRetry)
|
66
|
+
.withInitialRetryWait(500)
|
67
|
+
.withMaxRetryWait(30 * 1000)
|
68
|
+
.runInterruptible(new RetryExecutor.Retryable<Void>() {
|
69
|
+
@Override
|
70
|
+
public Void call() throws IOException
|
71
|
+
{
|
72
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
73
|
+
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
74
|
+
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
105
75
|
}
|
76
|
+
return null;
|
77
|
+
}
|
106
78
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
79
|
+
@Override
|
80
|
+
public boolean isRetryableException(Exception exception)
|
81
|
+
{
|
82
|
+
return true; // TODO
|
83
|
+
}
|
112
84
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
}
|
122
|
-
else {
|
123
|
-
log.warn(message);
|
124
|
-
}
|
85
|
+
@Override
|
86
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
87
|
+
throws RetryExecutor.RetryGiveupException
|
88
|
+
{
|
89
|
+
String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
90
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
91
|
+
if (retryCount % 3 == 0) {
|
92
|
+
log.warn(message, exception);
|
125
93
|
}
|
126
|
-
|
127
|
-
|
128
|
-
public void onGiveup(Exception firstException, Exception lastException)
|
129
|
-
throws RetryExecutor.RetryGiveupException
|
130
|
-
{
|
94
|
+
else {
|
95
|
+
log.warn(message);
|
131
96
|
}
|
132
|
-
}
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
101
|
+
throws RetryExecutor.RetryGiveupException
|
102
|
+
{
|
103
|
+
}
|
104
|
+
});
|
105
|
+
}
|
106
|
+
catch (RetryExecutor.RetryGiveupException | InterruptedException ex) {
|
107
|
+
throw Throwables.propagate(ex.getCause());
|
141
108
|
}
|
142
109
|
}
|
143
110
|
}
|
@@ -24,12 +24,7 @@ import org.junit.BeforeClass;
|
|
24
24
|
import org.junit.Rule;
|
25
25
|
import org.junit.Test;
|
26
26
|
|
27
|
-
import java.io.BufferedReader;
|
28
|
-
|
29
|
-
import java.io.File;
|
30
27
|
import java.io.IOException;
|
31
|
-
import java.io.InputStream;
|
32
|
-
import java.io.InputStreamReader;
|
33
28
|
import java.security.GeneralSecurityException;
|
34
29
|
import java.util.ArrayList;
|
35
30
|
import java.util.Arrays;
|
@@ -420,35 +415,6 @@ public class TestGcsFileInputPlugin
|
|
420
415
|
assertRecords(config, output);
|
421
416
|
}
|
422
417
|
|
423
|
-
@Test
|
424
|
-
public void testGcsFileInputByReopen()
|
425
|
-
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, IOException
|
426
|
-
{
|
427
|
-
ConfigSource config = Exec.newConfigSource()
|
428
|
-
.set("bucket", GCP_BUCKET)
|
429
|
-
.set("path_prefix", GCP_PATH_PREFIX)
|
430
|
-
.set("auth_method", "json_key")
|
431
|
-
.set("service_account_email", GCP_EMAIL)
|
432
|
-
.set("json_keyfile", GCP_JSON_KEYFILE)
|
433
|
-
.set("parser", parserConfig(schemaConfig()));
|
434
|
-
|
435
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
436
|
-
runner.transaction(config, new Control());
|
437
|
-
|
438
|
-
Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
439
|
-
method.setAccessible(true);
|
440
|
-
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
441
|
-
File tempFile = Exec.getTempFileSpace().createTempFile();
|
442
|
-
task.setFiles(GcsFileInput.listFiles(task, client));
|
443
|
-
|
444
|
-
String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
|
445
|
-
SingleFileProvider.GcsInputStreamReopener opener = new SingleFileProvider.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
|
446
|
-
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
447
|
-
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
448
|
-
assertEquals("id,account,time,purchase,comment", r.readLine());
|
449
|
-
}
|
450
|
-
}
|
451
|
-
|
452
418
|
@Test
|
453
419
|
public void testBase64()
|
454
420
|
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,7 +71,7 @@ files:
|
|
71
71
|
- src/test/resources/secretkeys.tar.enc
|
72
72
|
- classpath/commons-codec-1.3.jar
|
73
73
|
- classpath/commons-logging-1.1.1.jar
|
74
|
-
- classpath/embulk-input-gcs-0.2.
|
74
|
+
- classpath/embulk-input-gcs-0.2.7.jar
|
75
75
|
- classpath/google-api-client-1.21.0.jar
|
76
76
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|
77
77
|
- classpath/google-http-client-1.21.0.jar
|