embulk-input-gcs 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/CHANGELOG.md +3 -0
- data/README.md +16 -0
- data/build.gradle +1 -1
- data/classpath/embulk-input-gcs-0.2.6.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/input/gcs/FileList.java +335 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInput.java +195 -0
- data/src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java +10 -362
- data/src/main/java/org/embulk/input/gcs/PluginTask.java +71 -0
- data/src/main/java/org/embulk/input/gcs/SingleFileProvider.java +143 -0
- data/src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java +54 -21
- metadata +7 -3
- data/classpath/embulk-input-gcs-0.2.5.jar +0 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
package org.embulk.input.gcs;
|
2
|
+
|
3
|
+
import com.google.api.client.util.IOUtils;
|
4
|
+
import com.google.api.services.storage.Storage;
|
5
|
+
import com.google.common.annotations.VisibleForTesting;
|
6
|
+
import com.google.common.base.Throwables;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
9
|
+
import org.embulk.spi.util.ResumableInputStream;
|
10
|
+
import org.embulk.spi.util.RetryExecutor;
|
11
|
+
import org.slf4j.Logger;
|
12
|
+
|
13
|
+
import java.io.BufferedInputStream;
|
14
|
+
import java.io.BufferedOutputStream;
|
15
|
+
import java.io.File;
|
16
|
+
import java.io.FileInputStream;
|
17
|
+
import java.io.FileOutputStream;
|
18
|
+
import java.io.IOException;
|
19
|
+
import java.io.InputStream;
|
20
|
+
import java.io.InterruptedIOException;
|
21
|
+
import java.util.Iterator;
|
22
|
+
|
23
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
24
|
+
|
25
|
+
public class SingleFileProvider
|
26
|
+
implements InputStreamFileInput.Provider
|
27
|
+
{
|
28
|
+
private final Storage client;
|
29
|
+
private final String bucket;
|
30
|
+
private final Iterator<String> iterator;
|
31
|
+
private final int maxConnectionRetry;
|
32
|
+
private boolean opened = false;
|
33
|
+
|
34
|
+
public SingleFileProvider(PluginTask task, int taskIndex)
|
35
|
+
{
|
36
|
+
this.client = GcsFileInput.newGcsClient(task, GcsFileInput.newGcsAuth(task));
|
37
|
+
this.bucket = task.getBucket();
|
38
|
+
this.iterator = task.getFiles().get(taskIndex).iterator();
|
39
|
+
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
40
|
+
}
|
41
|
+
|
42
|
+
@Override
|
43
|
+
public InputStream openNext() throws IOException
|
44
|
+
{
|
45
|
+
if (opened) {
|
46
|
+
return null;
|
47
|
+
}
|
48
|
+
opened = true;
|
49
|
+
if (!iterator.hasNext()) {
|
50
|
+
return null;
|
51
|
+
}
|
52
|
+
String key = iterator.next();
|
53
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
54
|
+
File tempFile = Exec.getTempFileSpace().createTempFile();
|
55
|
+
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
56
|
+
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
57
|
+
}
|
58
|
+
return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
|
59
|
+
}
|
60
|
+
|
61
|
+
@Override
|
62
|
+
public void close()
|
63
|
+
{
|
64
|
+
}
|
65
|
+
|
66
|
+
@VisibleForTesting
|
67
|
+
static class GcsInputStreamReopener
|
68
|
+
implements ResumableInputStream.Reopener
|
69
|
+
{
|
70
|
+
private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
|
71
|
+
private final File tempFile;
|
72
|
+
private final Storage client;
|
73
|
+
private final String bucket;
|
74
|
+
private final String key;
|
75
|
+
private final int maxConnectionRetry;
|
76
|
+
|
77
|
+
public GcsInputStreamReopener(File tempFile, Storage client, String bucket, String key, int maxConnectionRetry)
|
78
|
+
{
|
79
|
+
this.tempFile = tempFile;
|
80
|
+
this.client = client;
|
81
|
+
this.bucket = bucket;
|
82
|
+
this.key = key;
|
83
|
+
this.maxConnectionRetry = maxConnectionRetry;
|
84
|
+
}
|
85
|
+
|
86
|
+
@Override
|
87
|
+
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
88
|
+
{
|
89
|
+
try {
|
90
|
+
return retryExecutor()
|
91
|
+
.withRetryLimit(maxConnectionRetry)
|
92
|
+
.withInitialRetryWait(500)
|
93
|
+
.withMaxRetryWait(30 * 1000)
|
94
|
+
.runInterruptible(new RetryExecutor.Retryable<InputStream>() {
|
95
|
+
@Override
|
96
|
+
public InputStream call() throws IOException
|
97
|
+
{
|
98
|
+
log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
99
|
+
Storage.Objects.Get getObject = client.objects().get(bucket, key);
|
100
|
+
|
101
|
+
try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
|
102
|
+
IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
|
103
|
+
}
|
104
|
+
return new BufferedInputStream(new FileInputStream(tempFile));
|
105
|
+
}
|
106
|
+
|
107
|
+
@Override
|
108
|
+
public boolean isRetryableException(Exception exception)
|
109
|
+
{
|
110
|
+
return true; // TODO
|
111
|
+
}
|
112
|
+
|
113
|
+
@Override
|
114
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
115
|
+
throws RetryExecutor.RetryGiveupException
|
116
|
+
{
|
117
|
+
String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
118
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
119
|
+
if (retryCount % 3 == 0) {
|
120
|
+
log.warn(message, exception);
|
121
|
+
}
|
122
|
+
else {
|
123
|
+
log.warn(message);
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
@Override
|
128
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
129
|
+
throws RetryExecutor.RetryGiveupException
|
130
|
+
{
|
131
|
+
}
|
132
|
+
});
|
133
|
+
}
|
134
|
+
catch (RetryExecutor.RetryGiveupException ex) {
|
135
|
+
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
136
|
+
throw Throwables.propagate(ex.getCause());
|
137
|
+
}
|
138
|
+
catch (InterruptedException ex) {
|
139
|
+
throw new InterruptedIOException();
|
140
|
+
}
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
|
|
11
11
|
import org.embulk.config.ConfigSource;
|
12
12
|
import org.embulk.config.TaskReport;
|
13
13
|
import org.embulk.config.TaskSource;
|
14
|
-
import org.embulk.input.gcs.GcsFileInputPlugin.PluginTask;
|
15
14
|
import org.embulk.spi.Exec;
|
16
15
|
import org.embulk.spi.FileInputPlugin;
|
17
16
|
import org.embulk.spi.FileInputRunner;
|
@@ -37,7 +36,6 @@ import java.util.Arrays;
|
|
37
36
|
import java.util.List;
|
38
37
|
|
39
38
|
import static org.junit.Assert.assertEquals;
|
40
|
-
import static org.junit.Assert.assertFalse;
|
41
39
|
import static org.junit.Assume.assumeNotNull;
|
42
40
|
|
43
41
|
import java.lang.reflect.InvocationTargetException;
|
@@ -105,7 +103,7 @@ public class TestGcsFileInputPlugin
|
|
105
103
|
.set("bucket", GCP_BUCKET)
|
106
104
|
.set("path_prefix", "my-prefix");
|
107
105
|
|
108
|
-
|
106
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
109
107
|
assertEquals(true, task.getIncremental());
|
110
108
|
assertEquals("private_key", task.getAuthMethod().toString());
|
111
109
|
assertEquals("Embulk GCS input plugin", task.getApplicationName());
|
@@ -124,8 +122,8 @@ public class TestGcsFileInputPlugin
|
|
124
122
|
.set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
|
125
123
|
.set("parser", parserConfig(schemaConfig()));
|
126
124
|
|
127
|
-
|
128
|
-
|
125
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
126
|
+
assertEquals(2, task.getPathFiles().size());
|
129
127
|
}
|
130
128
|
|
131
129
|
// both path_prefix and paths are not set
|
@@ -230,7 +228,7 @@ public class TestGcsFileInputPlugin
|
|
230
228
|
|
231
229
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
232
230
|
method.setAccessible(true);
|
233
|
-
|
231
|
+
GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task)); // no errors happens
|
234
232
|
}
|
235
233
|
|
236
234
|
@Test(expected = ConfigException.class)
|
@@ -251,14 +249,16 @@ public class TestGcsFileInputPlugin
|
|
251
249
|
|
252
250
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
253
251
|
method.setAccessible(true);
|
254
|
-
|
252
|
+
GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
255
253
|
}
|
256
254
|
|
257
255
|
@Test
|
258
256
|
public void testResume()
|
259
257
|
{
|
260
258
|
PluginTask task = config.loadConfig(PluginTask.class);
|
261
|
-
|
259
|
+
FileList.Builder builder = new FileList.Builder(config);
|
260
|
+
builder.add("in/aa/a", 1);
|
261
|
+
task.setFiles(builder.build());
|
262
262
|
ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
|
263
263
|
{
|
264
264
|
@Override
|
@@ -298,12 +298,44 @@ public class TestGcsFileInputPlugin
|
|
298
298
|
|
299
299
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
300
300
|
method.setAccessible(true);
|
301
|
-
Storage client =
|
302
|
-
|
303
|
-
|
301
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
302
|
+
FileList.Builder builder = new FileList.Builder(config);
|
303
|
+
GcsFileInput.listGcsFilesByPrefix(builder, client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
|
304
|
+
FileList fileList = builder.build();
|
305
|
+
assertEquals(expected.get(0), fileList.get(0).get(0));
|
306
|
+
assertEquals(expected.get(1), fileList.get(1).get(0));
|
304
307
|
assertEquals(GCP_BUCKET_DIRECTORY + "sample_02.csv", configDiff.get(String.class, "last_path"));
|
305
308
|
}
|
306
309
|
|
310
|
+
@Test
|
311
|
+
public void testListFilesByPrefixWithPattern()
|
312
|
+
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
313
|
+
{
|
314
|
+
List<String> expected = Arrays.asList(
|
315
|
+
GCP_BUCKET_DIRECTORY + "sample_01.csv"
|
316
|
+
);
|
317
|
+
|
318
|
+
ConfigSource configWithPattern = config.deepCopy().set("path_match_pattern", "1");
|
319
|
+
PluginTask task = configWithPattern.loadConfig(PluginTask.class);
|
320
|
+
ConfigDiff configDiff = plugin.transaction(configWithPattern, new FileInputPlugin.Control() {
|
321
|
+
@Override
|
322
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
323
|
+
{
|
324
|
+
assertEquals(1, taskCount);
|
325
|
+
return emptyTaskReports(taskCount);
|
326
|
+
}
|
327
|
+
});
|
328
|
+
|
329
|
+
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
330
|
+
method.setAccessible(true);
|
331
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
332
|
+
FileList.Builder builder = new FileList.Builder(configWithPattern);
|
333
|
+
GcsFileInput.listGcsFilesByPrefix(builder, client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
|
334
|
+
FileList fileList = builder.build();
|
335
|
+
assertEquals(expected.get(0), fileList.get(0).get(0));
|
336
|
+
assertEquals(GCP_BUCKET_DIRECTORY + "sample_01.csv", configDiff.get(String.class, "last_path"));
|
337
|
+
}
|
338
|
+
|
307
339
|
@Test
|
308
340
|
public void testListFilesByPrefixIncrementalFalse() throws Exception
|
309
341
|
{
|
@@ -324,8 +356,9 @@ public class TestGcsFileInputPlugin
|
|
324
356
|
|
325
357
|
Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
326
358
|
method.setAccessible(true);
|
327
|
-
Storage client =
|
328
|
-
|
359
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
360
|
+
FileList.Builder builder = new FileList.Builder(config);
|
361
|
+
GcsFileInput.listGcsFilesByPrefix(builder, client, "non-exists-bucket", "prefix", Optional.<String>absent()); // no errors happens
|
329
362
|
}
|
330
363
|
|
331
364
|
@Test
|
@@ -379,10 +412,10 @@ public class TestGcsFileInputPlugin
|
|
379
412
|
PluginTask task = config.loadConfig(PluginTask.class);
|
380
413
|
runner.transaction(config, new Control());
|
381
414
|
|
382
|
-
Method method =
|
415
|
+
Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
383
416
|
method.setAccessible(true);
|
384
|
-
Storage client =
|
385
|
-
task.setFiles(
|
417
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
418
|
+
task.setFiles(GcsFileInput.listFiles(task, client));
|
386
419
|
|
387
420
|
assertRecords(config, output);
|
388
421
|
}
|
@@ -402,14 +435,14 @@ public class TestGcsFileInputPlugin
|
|
402
435
|
PluginTask task = config.loadConfig(PluginTask.class);
|
403
436
|
runner.transaction(config, new Control());
|
404
437
|
|
405
|
-
Method method =
|
438
|
+
Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
|
406
439
|
method.setAccessible(true);
|
407
|
-
Storage client =
|
440
|
+
Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
|
408
441
|
File tempFile = Exec.getTempFileSpace().createTempFile();
|
409
|
-
task.setFiles(
|
442
|
+
task.setFiles(GcsFileInput.listFiles(task, client));
|
410
443
|
|
411
444
|
String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
|
412
|
-
|
445
|
+
SingleFileProvider.GcsInputStreamReopener opener = new SingleFileProvider.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
|
413
446
|
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
414
447
|
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
415
448
|
assertEquals("id,account,time,purchase,comment", r.readLine());
|
@@ -420,7 +453,7 @@ public class TestGcsFileInputPlugin
|
|
420
453
|
public void testBase64()
|
421
454
|
throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
|
422
455
|
{
|
423
|
-
Method method =
|
456
|
+
Method method = GcsFileInput.class.getDeclaredMethod("base64Encode", String.class);
|
424
457
|
method.setAccessible(true);
|
425
458
|
|
426
459
|
assertEquals("CgFj", method.invoke(plugin, "c"));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,8 +58,12 @@ files:
|
|
58
58
|
- gradlew.bat
|
59
59
|
- lib/embulk/input/gcs.rb
|
60
60
|
- settings.gradle
|
61
|
+
- src/main/java/org/embulk/input/gcs/FileList.java
|
61
62
|
- src/main/java/org/embulk/input/gcs/GcsAuthentication.java
|
63
|
+
- src/main/java/org/embulk/input/gcs/GcsFileInput.java
|
62
64
|
- src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
|
65
|
+
- src/main/java/org/embulk/input/gcs/PluginTask.java
|
66
|
+
- src/main/java/org/embulk/input/gcs/SingleFileProvider.java
|
63
67
|
- src/test/java/org/embulk/input/gcs/TestGcsAuthentication.java
|
64
68
|
- src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
|
65
69
|
- src/test/resources/sample_01.csv
|
@@ -67,7 +71,7 @@ files:
|
|
67
71
|
- src/test/resources/secretkeys.tar.enc
|
68
72
|
- classpath/commons-codec-1.3.jar
|
69
73
|
- classpath/commons-logging-1.1.1.jar
|
70
|
-
- classpath/embulk-input-gcs-0.2.
|
74
|
+
- classpath/embulk-input-gcs-0.2.6.jar
|
71
75
|
- classpath/google-api-client-1.21.0.jar
|
72
76
|
- classpath/google-api-services-storage-v1-rev59-1.21.0.jar
|
73
77
|
- classpath/google-http-client-1.21.0.jar
|
Binary file
|