embulk-input-gcs 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7f7692d41284e6ac5299bb159a47416b4a31b5a
|
4
|
+
data.tar.gz: c970445b2be3449e3691e63cc139ca3f362333c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9220e5404d2edfd40193813563ce40d487fabab48e78073c256193eac383edd1cae8e84700e97eb47e7513ba0ec8002d19ae7879b63c7a57961852fec0a40348
|
7
|
+
data.tar.gz: 104febfa4425cac2622d381222c18c3c11033e965731087b7a9fa69f34907cd7f543747b84ffaac83ca4b23b3c2680cf413fc858661c9bd74cbc87d0a6be3342
|
data/ChangeLog
CHANGED
data/build.gradle
CHANGED
@@ -4,6 +4,7 @@ import java.util.List;
|
|
4
4
|
import java.util.ArrayList;
|
5
5
|
import java.util.Collections;
|
6
6
|
import java.io.IOException;
|
7
|
+
import java.io.InterruptedIOException;
|
7
8
|
import java.io.InputStream;
|
8
9
|
import java.math.BigInteger;
|
9
10
|
|
@@ -14,6 +15,7 @@ import com.google.common.base.Function;
|
|
14
15
|
import com.google.common.base.Throwables;
|
15
16
|
import com.google.common.base.Charsets;
|
16
17
|
import com.google.common.io.BaseEncoding;
|
18
|
+
import com.google.common.annotations.VisibleForTesting;
|
17
19
|
import java.security.GeneralSecurityException;
|
18
20
|
|
19
21
|
import org.embulk.config.TaskReport;
|
@@ -31,6 +33,10 @@ import org.embulk.spi.FileInputPlugin;
|
|
31
33
|
import org.embulk.spi.TransactionalFileInput;
|
32
34
|
import org.embulk.spi.unit.LocalFile;
|
33
35
|
import org.embulk.spi.util.InputStreamFileInput;
|
36
|
+
import org.embulk.spi.util.ResumableInputStream;
|
37
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
38
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
39
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
34
40
|
|
35
41
|
import org.slf4j.Logger;
|
36
42
|
|
@@ -278,6 +284,73 @@ public class GcsFileInputPlugin
|
|
278
284
|
return new GcsFileInput(task, taskIndex);
|
279
285
|
}
|
280
286
|
|
287
|
+
@VisibleForTesting
|
288
|
+
static class GcsInputStreamReopener
|
289
|
+
implements ResumableInputStream.Reopener
|
290
|
+
{
|
291
|
+
private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
|
292
|
+
private final Storage client;
|
293
|
+
private final String bucket;
|
294
|
+
private final String key;
|
295
|
+
|
296
|
+
public GcsInputStreamReopener(Storage client, String bucket, String key)
|
297
|
+
{
|
298
|
+
this.client = client;
|
299
|
+
this.bucket = bucket;
|
300
|
+
this.key = key;
|
301
|
+
}
|
302
|
+
|
303
|
+
@Override
|
304
|
+
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
305
|
+
{
|
306
|
+
try {
|
307
|
+
return retryExecutor()
|
308
|
+
.withRetryLimit(3)
|
309
|
+
.withInitialRetryWait(500)
|
310
|
+
.withMaxRetryWait(30 * 1000)
|
311
|
+
.runInterruptible(new Retryable<InputStream>() {
|
312
|
+
@Override
|
313
|
+
public InputStream call() throws InterruptedIOException, IOException
|
314
|
+
{
|
315
|
+
log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
316
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
317
|
+
return getObject.executeMediaAsInputStream();
|
318
|
+
}
|
319
|
+
|
320
|
+
@Override
|
321
|
+
public boolean isRetryableException(Exception exception)
|
322
|
+
{
|
323
|
+
return true; // TODO
|
324
|
+
}
|
325
|
+
|
326
|
+
@Override
|
327
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
328
|
+
throws RetryGiveupException
|
329
|
+
{
|
330
|
+
String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
331
|
+
retryCount, retryLimit, retryWait/1000, exception.getMessage());
|
332
|
+
if (retryCount % 3 == 0) {
|
333
|
+
log.warn(message, exception);
|
334
|
+
} else {
|
335
|
+
log.warn(message);
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
@Override
|
340
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
341
|
+
throws RetryGiveupException
|
342
|
+
{
|
343
|
+
}
|
344
|
+
});
|
345
|
+
} catch (RetryGiveupException ex) {
|
346
|
+
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
347
|
+
throw Throwables.propagate(ex.getCause());
|
348
|
+
} catch (InterruptedException ex) {
|
349
|
+
throw new InterruptedIOException();
|
350
|
+
}
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
281
354
|
public class GcsFileInput
|
282
355
|
extends InputStreamFileInput
|
283
356
|
implements TransactionalFileInput
|
@@ -322,7 +395,7 @@ public class GcsFileInputPlugin
|
|
322
395
|
opened = true;
|
323
396
|
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
324
397
|
|
325
|
-
return getObject.executeMediaAsInputStream();
|
398
|
+
return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key));
|
326
399
|
}
|
327
400
|
|
328
401
|
@Override
|
@@ -7,6 +7,9 @@ import com.google.common.collect.ImmutableMap;
|
|
7
7
|
import com.google.common.base.Optional;
|
8
8
|
import com.google.common.collect.ImmutableList;
|
9
9
|
import com.google.common.collect.Lists;
|
10
|
+
import java.io.BufferedReader;
|
11
|
+
import java.io.InputStream;
|
12
|
+
import java.io.InputStreamReader;
|
10
13
|
import java.io.IOException;
|
11
14
|
import java.security.GeneralSecurityException;
|
12
15
|
|
@@ -199,7 +202,7 @@ public class TestGcsFileInputPlugin
|
|
199
202
|
.set("json_keyfile", GCP_JSON_KEYFILE)
|
200
203
|
.set("parser", parserConfig(schemaConfig()));
|
201
204
|
|
202
|
-
PluginTask task = config
|
205
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
203
206
|
runner.transaction(config, new Control());
|
204
207
|
|
205
208
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
@@ -292,6 +295,34 @@ public class TestGcsFileInputPlugin
|
|
292
295
|
assertRecords(config, output);
|
293
296
|
}
|
294
297
|
|
298
|
+
@Test
|
299
|
+
public void testGcsFileInputByReopen()
|
300
|
+
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, IOException
|
301
|
+
{
|
302
|
+
ConfigSource config = Exec.newConfigSource()
|
303
|
+
.set("bucket", GCP_BUCKET)
|
304
|
+
.set("path_prefix", GCP_PATH_PREFIX)
|
305
|
+
.set("auth_method", "json_key")
|
306
|
+
.set("service_account_email", GCP_EMAIL)
|
307
|
+
.set("json_keyfile", GCP_JSON_KEYFILE)
|
308
|
+
.set("parser", parserConfig(schemaConfig()));
|
309
|
+
|
310
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
311
|
+
runner.transaction(config, new Control());
|
312
|
+
|
313
|
+
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
314
|
+
method.setAccessible(true);
|
315
|
+
Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
316
|
+
task.setFiles(plugin.listFiles(task, client));
|
317
|
+
|
318
|
+
String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
|
319
|
+
GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key);
|
320
|
+
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
321
|
+
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
322
|
+
assertEquals("id,account,time,purchase,comment", r.readLine());
|
323
|
+
}
|
324
|
+
}
|
325
|
+
|
295
326
|
@Test
|
296
327
|
public void testBase64()
|
297
328
|
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -63,7 +63,7 @@ files:
|
|
63
63
|
- src/test/resources/sample_02.csv
|
64
64
|
- classpath/commons-codec-1.3.jar
|
65
65
|
- classpath/commons-logging-1.1.1.jar
|
66
|
-
- classpath/embulk-input-gcs-0.1.
|
66
|
+
- classpath/embulk-input-gcs-0.1.11.jar
|
67
67
|
- classpath/google-api-client-1.19.1.jar
|
68
68
|
- classpath/google-api-services-storage-v1-rev27-1.19.1.jar
|
69
69
|
- classpath/google-http-client-1.19.0.jar
|