embulk-input-gcs 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c7a8f8dcd33afc9c58b25bc3604dae90a407fb4
|
4
|
+
data.tar.gz: bd5afe31028d098c805e7096a6939e877bc2633e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e761bc69cbb53342b4f7cc943bff476e8678a6dc4f22306039d2269fd91d4651c797d51d6891580af148d53d4cc54fb3a7a45710c9cada571dd59db88b12842
|
7
|
+
data.tar.gz: 2e03a2ca66c9daae76190f70c5dda99c437cf783e77ff5dd6ed1fe15dbe6633e4644350f53d303eb7e0aa06af84540d4bd6d5f6ffb65299819e508a397b5f9d6
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
## 0.2.7 - 2018-03-22
|
2
|
+
* [maintenance] Fix retry logic to avoid IOException happens while IOUtils.copy() [#33](https://github.com/embulk/embulk-input-gcs/pull/33)
|
3
|
+
|
1
4
|
## 0.2.6 - 2018-03-05
|
2
|
-
* [
|
5
|
+
* [new feature] Support "path_match_pattern" option [#32](https://github.com/embulk/embulk-input-gcs/pull/32)
|
3
6
|
|
4
7
|
## 0.2.5 - 2017-05-19
|
5
8
|
* [maintenance] Fix InputStream handling to avoid plugin get less records than expected [#27](https://github.com/embulk/embulk-input-gcs/pull/27)
|
data/build.gradle
CHANGED
@@ -2,11 +2,9 @@ package org.embulk.input.gcs;
|
|
2
2
|
|
3
3
|
import com.google.api.client.util.IOUtils;
|
4
4
|
import com.google.api.services.storage.Storage;
|
5
|
-
import com.google.common.annotations.VisibleForTesting;
|
6
5
|
import com.google.common.base.Throwables;
|
7
6
|
import org.embulk.spi.Exec;
|
8
7
|
import org.embulk.spi.util.InputStreamFileInput;
|
9
|
-
import org.embulk.spi.util.ResumableInputStream;
|
10
8
|
import org.embulk.spi.util.RetryExecutor;
|
11
9
|
import org.slf4j.Logger;
|
12
10
|
|
@@ -17,7 +15,6 @@ import java.io.FileInputStream;
|
|
17
15
|
import java.io.FileOutputStream;
|
18
16
|
import java.io.IOException;
|
19
17
|
import java.io.InputStream;
|
20
|
-
import java.io.InterruptedIOException;
|
21
18
|
import java.util.Iterator;
|
22
19
|
|
23
20
|
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
@@ -30,6 +27,7 @@ public class SingleFileProvider
|
|
30
27
|
private final Iterator<String> iterator;
|
31
28
|
private final int maxConnectionRetry;
|
32
29
|
private boolean opened = false;
|
30
|
+
private final Logger log = Exec.getLogger(SingleFileProvider.class);
|
33
31
|
|
34
32
|
public SingleFileProvider(PluginTask task, int taskIndex)
|
35
33
|
{
|
@@ -50,12 +48,9 @@ public class SingleFileProvider
|
|
50
48
|
return null;
|
51
49
|
}
|
52
50
|
String key = iterator.next();
|
53
|
-
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
54
51
|
File tempFile = Exec.getTempFileSpace().createTempFile();
|
55
|
-
|
56
|
-
|
57
|
-
}
|
58
|
-
return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
|
52
|
+
getRemoteContentsWithRetry(tempFile, client, bucket, key, maxConnectionRetry);
|
53
|
+
return new BufferedInputStream(new FileInputStream(tempFile));
|
59
54
|
}
|
60
55
|
|
61
56
|
@Override
|
@@ -63,81 +58,53 @@ public class SingleFileProvider
|
|
63
58
|
{
|
64
59
|
}
|
65
60
|
|
66
|
-
|
67
|
-
static class GcsInputStreamReopener
|
68
|
-
implements ResumableInputStream.Reopener
|
61
|
+
private Void getRemoteContentsWithRetry(final File tempFile, final Storage client, final String bucket, final String key, int maxConnectionRetry)
|
69
62
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
this.key = key;
|
83
|
-
this.maxConnectionRetry = maxConnectionRetry;
|
84
|
-
}
|
85
|
-
|
86
|
-
@Override
|
87
|
-
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
88
|
-
{
|
89
|
-
try {
|
90
|
-
return retryExecutor()
|
91
|
-
.withRetryLimit(maxConnectionRetry)
|
92
|
-
.withInitialRetryWait(500)
|
93
|
-
.withMaxRetryWait(30 * 1000)
|
94
|
-
.runInterruptible(new RetryExecutor.Retryable<InputStream>() {
|
95
|
-
@Override
|
96
|
-
public InputStream call() throws IOException
|
97
|
-
{
|
98
|
-
log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
99
|
-
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
100
|
-
|
101
|
-
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
102
|
-
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
103
|
-
}
|
104
|
-
return new BufferedInputStream(new FileInputStream(tempFile));
|
63
|
+
try {
|
64
|
+
return retryExecutor()
|
65
|
+
.withRetryLimit(maxConnectionRetry)
|
66
|
+
.withInitialRetryWait(500)
|
67
|
+
.withMaxRetryWait(30 * 1000)
|
68
|
+
.runInterruptible(new RetryExecutor.Retryable<Void>() {
|
69
|
+
@Override
|
70
|
+
public Void call() throws IOException
|
71
|
+
{
|
72
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
73
|
+
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
74
|
+
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
105
75
|
}
|
76
|
+
return null;
|
77
|
+
}
|
106
78
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
79
|
+
@Override
|
80
|
+
public boolean isRetryableException(Exception exception)
|
81
|
+
{
|
82
|
+
return true; // TODO
|
83
|
+
}
|
112
84
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
}
|
122
|
-
else {
|
123
|
-
log.warn(message);
|
124
|
-
}
|
85
|
+
@Override
|
86
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
87
|
+
throws RetryExecutor.RetryGiveupException
|
88
|
+
{
|
89
|
+
String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
90
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
91
|
+
if (retryCount % 3 == 0) {
|
92
|
+
log.warn(message, exception);
|
125
93
|
}
|
126
|
-
|
127
|
-
|
128
|
-
public void onGiveup(Exception firstException, Exception lastException)
|
129
|
-
throws RetryExecutor.RetryGiveupException
|
130
|
-
{
|
94
|
+
else {
|
95
|
+
log.warn(message);
|
131
96
|
}
|
132
|
-
}
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
97
|
+
}
|
98
|
+
|
99
|
+
@Override
|
100
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
101
|
+
throws RetryExecutor.RetryGiveupException
|
102
|
+
{
|
103
|
+
}
|
104
|
+
});
|
105
|
+
}
|
106
|
+
catch (RetryExecutor.RetryGiveupException | InterruptedException ex) {
|
107
|
+
throw Throwables.propagate(ex.getCause());
|
141
108
|
}
|
142
109
|
}
|
143
110
|
}
|
@@ -24,12 +24,7 @@ import org.junit.BeforeClass;
|
|
24
24
|
import org.junit.Rule;
|
25
25
|
import org.junit.Test;
|
26
26
|
|
27
|
-
import java.io.BufferedReader;
|
28
|
-
|
29
|
-
import java.io.File;
|
30
27
|
import java.io.IOException;
|
31
|
-
import java.io.InputStream;
|
32
|
-
import java.io.InputStreamReader;
|
33
28
|
import java.security.GeneralSecurityException;
|
34
29
|
import java.util.ArrayList;
|
35
30
|
import java.util.Arrays;
|
@@ -420,35 +415,6 @@ public class TestGcsFileInputPlugin
|
|
420
415
|
assertRecords(config, output);
|
421
416
|
}
|
422
417
|
|
423
|
-
@Test
|
424
|
-
public void testGcsFileInputByReopen()
|
425
|
-
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, IOException
|
426
|
-
{
|
427
|
-
ConfigSource config = Exec.newConfigSource()
|
428
|
-
.set("bucket", GCP_BUCKET)
|
429
|
-
.set("path_prefix", GCP_PATH_PREFIX)
|
430
|
-
.set("auth_method", "json_key")
|
431
|
-
.set("service_account_email", GCP_EMAIL)
|
432
|
-
.set("json_keyfile", GCP_JSON_KEYFILE)
|
433
|
-
.set("parser", parserConfig(schemaConfig()));
|
434
|
-
|
435
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
436
|
-
runner.transaction(config, new Control());
|
437
|
-
|
438
|
-
Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
439
|
-
method.setAccessible(true);
|
440
|
-
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
441
|
-
File tempFile = Exec.getTempFileSpace().createTempFile();
|
442
|
-
task.setFiles(GcsFileInput.listFiles(task, client));
|
443
|
-
|
444
|
-
String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
|
445
|
-
SingleFileProvider.GcsInputStreamReopener opener = new SingleFileProvider.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
|
446
|
-
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
447
|
-
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
448
|
-
assertEquals("id,account,time,purchase,comment", r.readLine());
|
449
|
-
}
|
450
|
-
}
|
451
|
-
|
452
418
|
@Test
|
453
419
|
public void testBase64()
|
454
420
|
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,7 +71,7 @@ files:
|
|
71
71
|
- src/test/resources/secretkeys.tar.enc
|
72
72
|
- classpath/commons-codec-1.3.jar
|
73
73
|
- classpath/commons-logging-1.1.1.jar
|
74
|
-
- classpath/embulk-input-gcs-0.2.
|
74
|
+
- classpath/embulk-input-gcs-0.2.7.jar
|
75
75
|
- classpath/google-api-client-1.21.0.jar
|
76
76
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|
77
77
|
- classpath/google-http-client-1.21.0.jar
|