embulk-input-gcs 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/CHANGELOG.md +3 -0
- data/README.md +16 -0
- data/build.gradle +1 -1
- data/classpath/embulk-input-gcs-0.2.6.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/input/gcs/FileList.java +335 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInput.java +195 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +10 -362
- data/src/main/java/org/embulk/input/gcs/PluginTask.java +71 -0
- data/src/main/java/org/embulk/input/gcs/SingleFileProvider.java +143 -0
- data/src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java +54 -21
- metadata +7 -3
- data/classpath/embulk-input-gcs-0.2.5.jar +0 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import com.google.api.client.util.IOUtils;
|
4
|
+
import com.google.api.services.storage.Storage;
|
5
|
+
import com.google.common.annotations.VisibleForTesting;
|
6
|
+
import com.google.common.base.Throwables;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
9
|
+
import org.embulk.spi.util.ResumableInputStream;
|
10
|
+
import org.embulk.spi.util.RetryExecutor;
|
11
|
+
import org.slf4j.Logger;
|
12
|
+
|
13
|
+
import java.io.BufferedInputStream;
|
14
|
+
import java.io.BufferedOutputStream;
|
15
|
+
import java.io.File;
|
16
|
+
import java.io.FileInputStream;
|
17
|
+
import java.io.FileOutputStream;
|
18
|
+
import java.io.IOException;
|
19
|
+
import java.io.InputStream;
|
20
|
+
import java.io.InterruptedIOException;
|
21
|
+
import java.util.Iterator;
|
22
|
+
|
23
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
24
|
+
|
25
|
+
public class SingleFileProvider
|
26
|
+
implements InputStreamFileInput.Provider
|
27
|
+
{
|
28
|
+
private final Storage client;
|
29
|
+
private final String bucket;
|
30
|
+
private final Iterator<String> iterator;
|
31
|
+
private final int maxConnectionRetry;
|
32
|
+
private boolean opened = false;
|
33
|
+
|
34
|
+
public SingleFileProvider(PluginTask task, int taskIndex)
|
35
|
+
{
|
36
|
+
this.client = GcsFileInput.newGcsClient(task, GcsFileInput.newGcsAuth(task));
|
37
|
+
this.bucket = task.getBucket();
|
38
|
+
this.iterator = task.getFiles().get(taskIndex).iterator();
|
39
|
+
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
40
|
+
}
|
41
|
+
|
42
|
+
@Override
|
43
|
+
public InputStream openNext() throws IOException
|
44
|
+
{
|
45
|
+
if (opened) {
|
46
|
+
return null;
|
47
|
+
}
|
48
|
+
opened = true;
|
49
|
+
if (!iterator.hasNext()) {
|
50
|
+
return null;
|
51
|
+
}
|
52
|
+
String key = iterator.next();
|
53
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
54
|
+
File tempFile = Exec.getTempFileSpace().createTempFile();
|
55
|
+
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
56
|
+
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
57
|
+
}
|
58
|
+
return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
|
59
|
+
}
|
60
|
+
|
61
|
+
@Override
|
62
|
+
public void close()
|
63
|
+
{
|
64
|
+
}
|
65
|
+
|
66
|
+
@VisibleForTesting
|
67
|
+
static class GcsInputStreamReopener
|
68
|
+
implements ResumableInputStream.Reopener
|
69
|
+
{
|
70
|
+
private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
|
71
|
+
private final File tempFile;
|
72
|
+
private final Storage client;
|
73
|
+
private final String bucket;
|
74
|
+
private final String key;
|
75
|
+
private final int maxConnectionRetry;
|
76
|
+
|
77
|
+
public GcsInputStreamReopener(File tempFile, Storage client, String bucket, String key, int maxConnectionRetry)
|
78
|
+
{
|
79
|
+
this.tempFile = tempFile;
|
80
|
+
this.client = client;
|
81
|
+
this.bucket = bucket;
|
82
|
+
this.key = key;
|
83
|
+
this.maxConnectionRetry = maxConnectionRetry;
|
84
|
+
}
|
85
|
+
|
86
|
+
@Override
|
87
|
+
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
88
|
+
{
|
89
|
+
try {
|
90
|
+
return retryExecutor()
|
91
|
+
.withRetryLimit(maxConnectionRetry)
|
92
|
+
.withInitialRetryWait(500)
|
93
|
+
.withMaxRetryWait(30 * 1000)
|
94
|
+
.runInterruptible(new RetryExecutor.Retryable<InputStream>() {
|
95
|
+
@Override
|
96
|
+
public InputStream call() throws IOException
|
97
|
+
{
|
98
|
+
log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
99
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
100
|
+
|
101
|
+
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
102
|
+
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
103
|
+
}
|
104
|
+
return new BufferedInputStream(new FileInputStream(tempFile));
|
105
|
+
}
|
106
|
+
|
107
|
+
@Override
|
108
|
+
public boolean isRetryableException(Exception exception)
|
109
|
+
{
|
110
|
+
return true; // TODO
|
111
|
+
}
|
112
|
+
|
113
|
+
@Override
|
114
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
115
|
+
throws RetryExecutor.RetryGiveupException
|
116
|
+
{
|
117
|
+
String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
118
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
119
|
+
if (retryCount % 3 == 0) {
|
120
|
+
log.warn(message, exception);
|
121
|
+
}
|
122
|
+
else {
|
123
|
+
log.warn(message);
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
@Override
|
128
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
129
|
+
throws RetryExecutor.RetryGiveupException
|
130
|
+
{
|
131
|
+
}
|
132
|
+
});
|
133
|
+
}
|
134
|
+
catch (RetryExecutor.RetryGiveupException ex) {
|
135
|
+
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
136
|
+
throw Throwables.propagate(ex.getCause());
|
137
|
+
}
|
138
|
+
catch (InterruptedException ex) {
|
139
|
+
throw new InterruptedIOException();
|
140
|
+
}
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
|
|
11
11
|
import org.embulk.config.ConfigSource;
|
12
12
|
import org.embulk.config.TaskReport;
|
13
13
|
import org.embulk.config.TaskSource;
|
14
|
-
import org.embulk.input.gcs.GcsFileInputPlugin.PluginTask;
|
15
14
|
import org.embulk.spi.Exec;
|
16
15
|
import org.embulk.spi.FileInputPlugin;
|
17
16
|
import org.embulk.spi.FileInputRunner;
|
@@ -37,7 +36,6 @@ import java.util.Arrays;
|
|
37
36
|
import java.util.List;
|
38
37
|
|
39
38
|
import static org.junit.Assert.assertEquals;
|
40
|
-
import static org.junit.Assert.assertFalse;
|
41
39
|
import static org.junit.Assume.assumeNotNull;
|
42
40
|
|
43
41
|
import java.lang.reflect.InvocationTargetException;
|
@@ -105,7 +103,7 @@ public class TestGcsFileInputPlugin
|
|
105
103
|
.set("bucket", GCP_BUCKET)
|
106
104
|
.set("path_prefix", "my-prefix");
|
107
105
|
|
108
|
-
|
106
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
109
107
|
assertEquals(true, task.getIncremental());
|
110
108
|
assertEquals("private_key", task.getAuthMethod().toString());
|
111
109
|
assertEquals("Embulk GCS input plugin", task.getApplicationName());
|
@@ -124,8 +122,8 @@ public class TestGcsFileInputPlugin
|
|
124
122
|
.set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
|
125
123
|
.set("parser", parserConfig(schemaConfig()));
|
126
124
|
|
127
|
-
|
128
|
-
|
125
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
126
|
+
assertEquals(2, task.getPathFiles().size());
|
129
127
|
}
|
130
128
|
|
131
129
|
// both path_prefix and paths are not set
|
@@ -230,7 +228,7 @@ public class TestGcsFileInputPlugin
|
|
230
228
|
|
231
229
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
232
230
|
method.setAccessible(true);
|
233
|
-
|
231
|
+
GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task)); // no errors happens
|
234
232
|
}
|
235
233
|
|
236
234
|
@Test(expected = ConfigException.class)
|
@@ -251,14 +249,16 @@ public class TestGcsFileInputPlugin
|
|
251
249
|
|
252
250
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
253
251
|
method.setAccessible(true);
|
254
|
-
|
252
|
+
GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
255
253
|
}
|
256
254
|
|
257
255
|
@Test
|
258
256
|
public void testResume()
|
259
257
|
{
|
260
258
|
PluginTask task = config.loadConfig(PluginTask.class);
|
261
|
-
|
259
|
+
FileList.Builder builder = new FileList.Builder(config);
|
260
|
+
builder.add("in/aa/a", 1);
|
261
|
+
task.setFiles(builder.build());
|
262
262
|
ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
|
263
263
|
{
|
264
264
|
@Override
|
@@ -298,12 +298,44 @@ public class TestGcsFileInputPlugin
|
|
298
298
|
|
299
299
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
300
300
|
method.setAccessible(true);
|
301
|
-
Storage client =
|
302
|
-
|
303
|
-
|
301
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
302
|
+
FileList.Builder builder = new FileList.Builder(config);
|
303
|
+
GcsFileInput.listGcsFilesByPrefix(builder, client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
|
304
|
+
FileList fileList = builder.build();
|
305
|
+
assertEquals(expected.get(0), fileList.get(0).get(0));
|
306
|
+
assertEquals(expected.get(1), fileList.get(1).get(0));
|
304
307
|
assertEquals(GCP_BUCKET_DIRECTORY + "sample_02.csv", configDiff.get(String.class, "last_path"));
|
305
308
|
}
|
306
309
|
|
310
|
+
@Test
|
311
|
+
public void testListFilesByPrefixWithPattern()
|
312
|
+
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
313
|
+
{
|
314
|
+
List<String> expected = Arrays.asList(
|
315
|
+
GCP_BUCKET_DIRECTORY + "sample_01.csv"
|
316
|
+
);
|
317
|
+
|
318
|
+
ConfigSource configWithPattern = config.deepCopy().set("path_match_pattern", "1");
|
319
|
+
PluginTask task = configWithPattern.loadConfig(PluginTask.class);
|
320
|
+
ConfigDiff configDiff = plugin.transaction(configWithPattern, new FileInputPlugin.Control() {
|
321
|
+
@Override
|
322
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
323
|
+
{
|
324
|
+
assertEquals(1, taskCount);
|
325
|
+
return emptyTaskReports(taskCount);
|
326
|
+
}
|
327
|
+
});
|
328
|
+
|
329
|
+
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
330
|
+
method.setAccessible(true);
|
331
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
332
|
+
FileList.Builder builder = new FileList.Builder(configWithPattern);
|
333
|
+
GcsFileInput.listGcsFilesByPrefix(builder, client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
|
334
|
+
FileList fileList = builder.build();
|
335
|
+
assertEquals(expected.get(0), fileList.get(0).get(0));
|
336
|
+
assertEquals(GCP_BUCKET_DIRECTORY + "sample_01.csv", configDiff.get(String.class, "last_path"));
|
337
|
+
}
|
338
|
+
|
307
339
|
@Test
|
308
340
|
public void testListFilesByPrefixIncrementalFalse() throws Exception
|
309
341
|
{
|
@@ -324,8 +356,9 @@ public class TestGcsFileInputPlugin
|
|
324
356
|
|
325
357
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
326
358
|
method.setAccessible(true);
|
327
|
-
Storage client =
|
328
|
-
|
359
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
360
|
+
FileList.Builder builder = new FileList.Builder(config);
|
361
|
+
GcsFileInput.listGcsFilesByPrefix(builder, client, "non-exists-bucket", "prefix", Optional.<String>absent()); // no errors happens
|
329
362
|
}
|
330
363
|
|
331
364
|
@Test
|
@@ -379,10 +412,10 @@ public class TestGcsFileInputPlugin
|
|
379
412
|
PluginTask task = config.loadConfig(PluginTask.class);
|
380
413
|
runner.transaction(config, new Control());
|
381
414
|
|
382
|
-
Method method =
|
415
|
+
Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
383
416
|
method.setAccessible(true);
|
384
|
-
Storage client =
|
385
|
-
task.setFiles(
|
417
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
418
|
+
task.setFiles(GcsFileInput.listFiles(task, client));
|
386
419
|
|
387
420
|
assertRecords(config, output);
|
388
421
|
}
|
@@ -402,14 +435,14 @@ public class TestGcsFileInputPlugin
|
|
402
435
|
PluginTask task = config.loadConfig(PluginTask.class);
|
403
436
|
runner.transaction(config, new Control());
|
404
437
|
|
405
|
-
Method method =
|
438
|
+
Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
406
439
|
method.setAccessible(true);
|
407
|
-
Storage client =
|
440
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
408
441
|
File tempFile = Exec.getTempFileSpace().createTempFile();
|
409
|
-
task.setFiles(
|
442
|
+
task.setFiles(GcsFileInput.listFiles(task, client));
|
410
443
|
|
411
444
|
String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
|
412
|
-
|
445
|
+
SingleFileProvider.GcsInputStreamReopener opener = new SingleFileProvider.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
|
413
446
|
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
414
447
|
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
415
448
|
assertEquals("id,account,time,purchase,comment", r.readLine());
|
@@ -420,7 +453,7 @@ public class TestGcsFileInputPlugin
|
|
420
453
|
public void testBase64()
|
421
454
|
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
422
455
|
{
|
423
|
-
Method method =
|
456
|
+
Method method = GcsFileInput.class.getDeclaredMethod("base64Encode", String.class);
|
424
457
|
method.setAccessible(true);
|
425
458
|
|
426
459
|
assertEquals("CgFj", method.invoke(plugin, "c"));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,8 +58,12 @@ files:
|
|
58
58
|
- gradlew.bat
|
59
59
|
- lib/embulk/input/gcs.rb
|
60
60
|
- settings.gradle
|
61
|
+
- src/main/java/org/embulk/input/gcs/FileList.java
|
61
62
|
- src/main/java/org/embulk/input/gcs/GcsAuthentication.java
|
63
|
+
- src/main/java/org/embulk/input/gcs/GcsFileInput.java
|
62
64
|
- src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
|
65
|
+
- src/main/java/org/embulk/input/gcs/PluginTask.java
|
66
|
+
- src/main/java/org/embulk/input/gcs/SingleFileProvider.java
|
63
67
|
- src/test/java/org/embulk/input/gcs/TestGcsAuthentication.java
|
64
68
|
- src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
|
65
69
|
- src/test/resources/sample_01.csv
|
@@ -67,7 +71,7 @@ files:
|
|
67
71
|
- src/test/resources/secretkeys.tar.enc
|
68
72
|
- classpath/commons-codec-1.3.jar
|
69
73
|
- classpath/commons-logging-1.1.1.jar
|
70
|
-
- classpath/embulk-input-gcs-0.2.
|
74
|
+
- classpath/embulk-input-gcs-0.2.6.jar
|
71
75
|
- classpath/google-api-client-1.21.0.jar
|
72
76
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|
73
77
|
- classpath/google-http-client-1.21.0.jar
|
Binary file
|