embulk-input-gcs 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,143 @@
1
+ package org.embulk.input.gcs;
2
+
3
+ import com.google.api.client.util.IOUtils;
4
+ import com.google.api.services.storage.Storage;
5
+ import com.google.common.annotations.VisibleForTesting;
6
+ import com.google.common.base.Throwables;
7
+ import org.embulk.spi.Exec;
8
+ import org.embulk.spi.util.InputStreamFileInput;
9
+ import org.embulk.spi.util.ResumableInputStream;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.slf4j.Logger;
12
+
13
+ import java.io.BufferedInputStream;
14
+ import java.io.BufferedOutputStream;
15
+ import java.io.File;
16
+ import java.io.FileInputStream;
17
+ import java.io.FileOutputStream;
18
+ import java.io.IOException;
19
+ import java.io.InputStream;
20
+ import java.io.InterruptedIOException;
21
+ import java.util.Iterator;
22
+
23
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
24
+
25
+ public class SingleFileProvider
26
+ implements InputStreamFileInput.Provider
27
+ {
28
+ private final Storage client;
29
+ private final String bucket;
30
+ private final Iterator<String> iterator;
31
+ private final int maxConnectionRetry;
32
+ private boolean opened = false;
33
+
34
+ public SingleFileProvider(PluginTask task, int taskIndex)
35
+ {
36
+ this.client = GcsFileInput.newGcsClient(task, GcsFileInput.newGcsAuth(task));
37
+ this.bucket = task.getBucket();
38
+ this.iterator = task.getFiles().get(taskIndex).iterator();
39
+ this.maxConnectionRetry = task.getMaxConnectionRetry();
40
+ }
41
+
42
+ @Override
43
+ public InputStream openNext() throws IOException
44
+ {
45
+ if (opened) {
46
+ return null;
47
+ }
48
+ opened = true;
49
+ if (!iterator.hasNext()) {
50
+ return null;
51
+ }
52
+ String key = iterator.next();
53
+ Storage.Objects.Get getObject = client.objects().get(bucket, key);
54
+ File tempFile = Exec.getTempFileSpace().createTempFile();
55
+ try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
56
+ IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
57
+ }
58
+ return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
59
+ }
60
+
61
+ @Override
62
+ public void close()
63
+ {
64
+ }
65
+
66
+ @VisibleForTesting
67
+ static class GcsInputStreamReopener
68
+ implements ResumableInputStream.Reopener
69
+ {
70
+ private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
71
+ private final File tempFile;
72
+ private final Storage client;
73
+ private final String bucket;
74
+ private final String key;
75
+ private final int maxConnectionRetry;
76
+
77
+ public GcsInputStreamReopener(File tempFile, Storage client, String bucket, String key, int maxConnectionRetry)
78
+ {
79
+ this.tempFile = tempFile;
80
+ this.client = client;
81
+ this.bucket = bucket;
82
+ this.key = key;
83
+ this.maxConnectionRetry = maxConnectionRetry;
84
+ }
85
+
86
+ @Override
87
+ public InputStream reopen(final long offset, final Exception closedCause) throws IOException
88
+ {
89
+ try {
90
+ return retryExecutor()
91
+ .withRetryLimit(maxConnectionRetry)
92
+ .withInitialRetryWait(500)
93
+ .withMaxRetryWait(30 * 1000)
94
+ .runInterruptible(new RetryExecutor.Retryable<InputStream>() {
95
+ @Override
96
+ public InputStream call() throws IOException
97
+ {
98
+ log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
99
+ Storage.Objects.Get getObject = client.objects().get(bucket, key);
100
+
101
+ try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
102
+ IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
103
+ }
104
+ return new BufferedInputStream(new FileInputStream(tempFile));
105
+ }
106
+
107
+ @Override
108
+ public boolean isRetryableException(Exception exception)
109
+ {
110
+ return true; // TODO
111
+ }
112
+
113
+ @Override
114
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
115
+ throws RetryExecutor.RetryGiveupException
116
+ {
117
+ String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s",
118
+ retryCount, retryLimit, retryWait / 1000, exception.getMessage());
119
+ if (retryCount % 3 == 0) {
120
+ log.warn(message, exception);
121
+ }
122
+ else {
123
+ log.warn(message);
124
+ }
125
+ }
126
+
127
+ @Override
128
+ public void onGiveup(Exception firstException, Exception lastException)
129
+ throws RetryExecutor.RetryGiveupException
130
+ {
131
+ }
132
+ });
133
+ }
134
+ catch (RetryExecutor.RetryGiveupException ex) {
135
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
136
+ throw Throwables.propagate(ex.getCause());
137
+ }
138
+ catch (InterruptedException ex) {
139
+ throw new InterruptedIOException();
140
+ }
141
+ }
142
+ }
143
+ }
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
11
11
  import org.embulk.config.ConfigSource;
12
12
  import org.embulk.config.TaskReport;
13
13
  import org.embulk.config.TaskSource;
14
- import org.embulk.input.gcs.GcsFileInputPlugin.PluginTask;
15
14
  import org.embulk.spi.Exec;
16
15
  import org.embulk.spi.FileInputPlugin;
17
16
  import org.embulk.spi.FileInputRunner;
@@ -37,7 +36,6 @@ import java.util.Arrays;
37
36
  import java.util.List;
38
37
 
39
38
  import static org.junit.Assert.assertEquals;
40
- import static org.junit.Assert.assertFalse;
41
39
  import static org.junit.Assume.assumeNotNull;
42
40
 
43
41
  import java.lang.reflect.InvocationTargetException;
@@ -105,7 +103,7 @@ public class TestGcsFileInputPlugin
105
103
  .set("bucket", GCP_BUCKET)
106
104
  .set("path_prefix", "my-prefix");
107
105
 
108
- GcsFileInputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
106
+ PluginTask task = config.loadConfig(PluginTask.class);
109
107
  assertEquals(true, task.getIncremental());
110
108
  assertEquals("private_key", task.getAuthMethod().toString());
111
109
  assertEquals("Embulk GCS input plugin", task.getApplicationName());
@@ -124,8 +122,8 @@ public class TestGcsFileInputPlugin
124
122
  .set("p12_keyfile_fullpath", GCP_P12_KEYFILE)
125
123
  .set("parser", parserConfig(schemaConfig()));
126
124
 
127
- GcsFileInputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
128
- assertFalse(task.getFiles().isEmpty());
125
+ PluginTask task = config.loadConfig(PluginTask.class);
126
+ assertEquals(2, task.getPathFiles().size());
129
127
  }
130
128
 
131
129
  // both path_prefix and paths are not set
@@ -230,7 +228,7 @@ public class TestGcsFileInputPlugin
230
228
 
231
229
  Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
232
230
  method.setAccessible(true);
233
- plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task)); // no errors happens
231
+ GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task)); // no errors happens
234
232
  }
235
233
 
236
234
  @Test(expected = ConfigException.class)
@@ -251,14 +249,16 @@ public class TestGcsFileInputPlugin
251
249
 
252
250
  Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
253
251
  method.setAccessible(true);
254
- plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
252
+ GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
255
253
  }
256
254
 
257
255
  @Test
258
256
  public void testResume()
259
257
  {
260
258
  PluginTask task = config.loadConfig(PluginTask.class);
261
- task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
259
+ FileList.Builder builder = new FileList.Builder(config);
260
+ builder.add("in/aa/a", 1);
261
+ task.setFiles(builder.build());
262
262
  ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
263
263
  {
264
264
  @Override
@@ -298,12 +298,44 @@ public class TestGcsFileInputPlugin
298
298
 
299
299
  Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
300
300
  method.setAccessible(true);
301
- Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
302
- List<String> actual = plugin.listGcsFilesByPrefix(client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
303
- assertEquals(expected, actual);
301
+ Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
302
+ FileList.Builder builder = new FileList.Builder(config);
303
+ GcsFileInput.listGcsFilesByPrefix(builder, client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
304
+ FileList fileList = builder.build();
305
+ assertEquals(expected.get(0), fileList.get(0).get(0));
306
+ assertEquals(expected.get(1), fileList.get(1).get(0));
304
307
  assertEquals(GCP_BUCKET_DIRECTORY + "sample_02.csv", configDiff.get(String.class, "last_path"));
305
308
  }
306
309
 
310
+ @Test
311
+ public void testListFilesByPrefixWithPattern()
312
+ throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
313
+ {
314
+ List<String> expected = Arrays.asList(
315
+ GCP_BUCKET_DIRECTORY + "sample_01.csv"
316
+ );
317
+
318
+ ConfigSource configWithPattern = config.deepCopy().set("path_match_pattern", "1");
319
+ PluginTask task = configWithPattern.loadConfig(PluginTask.class);
320
+ ConfigDiff configDiff = plugin.transaction(configWithPattern, new FileInputPlugin.Control() {
321
+ @Override
322
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
323
+ {
324
+ assertEquals(1, taskCount);
325
+ return emptyTaskReports(taskCount);
326
+ }
327
+ });
328
+
329
+ Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
330
+ method.setAccessible(true);
331
+ Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
332
+ FileList.Builder builder = new FileList.Builder(configWithPattern);
333
+ GcsFileInput.listGcsFilesByPrefix(builder, client, GCP_BUCKET, GCP_PATH_PREFIX, Optional.<String>absent());
334
+ FileList fileList = builder.build();
335
+ assertEquals(expected.get(0), fileList.get(0).get(0));
336
+ assertEquals(GCP_BUCKET_DIRECTORY + "sample_01.csv", configDiff.get(String.class, "last_path"));
337
+ }
338
+
307
339
  @Test
308
340
  public void testListFilesByPrefixIncrementalFalse() throws Exception
309
341
  {
@@ -324,8 +356,9 @@ public class TestGcsFileInputPlugin
324
356
 
325
357
  Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
326
358
  method.setAccessible(true);
327
- Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
328
- plugin.listGcsFilesByPrefix(client, "non-exists-bucket", "prefix", Optional.<String>absent()); // no errors happens
359
+ Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
360
+ FileList.Builder builder = new FileList.Builder(config);
361
+ GcsFileInput.listGcsFilesByPrefix(builder, client, "non-exists-bucket", "prefix", Optional.<String>absent()); // no errors happens
329
362
  }
330
363
 
331
364
  @Test
@@ -379,10 +412,10 @@ public class TestGcsFileInputPlugin
379
412
  PluginTask task = config.loadConfig(PluginTask.class);
380
413
  runner.transaction(config, new Control());
381
414
 
382
- Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
415
+ Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
383
416
  method.setAccessible(true);
384
- Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
385
- task.setFiles(plugin.listFiles(task, client));
417
+ Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
418
+ task.setFiles(GcsFileInput.listFiles(task, client));
386
419
 
387
420
  assertRecords(config, output);
388
421
  }
@@ -402,14 +435,14 @@ public class TestGcsFileInputPlugin
402
435
  PluginTask task = config.loadConfig(PluginTask.class);
403
436
  runner.transaction(config, new Control());
404
437
 
405
- Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
438
+ Method method = GcsFileInput.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
406
439
  method.setAccessible(true);
407
- Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
440
+ Storage client = GcsFileInput.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
408
441
  File tempFile = Exec.getTempFileSpace().createTempFile();
409
- task.setFiles(plugin.listFiles(task, client));
442
+ task.setFiles(GcsFileInput.listFiles(task, client));
410
443
 
411
444
  String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
412
- GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
445
+ SingleFileProvider.GcsInputStreamReopener opener = new SingleFileProvider.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
413
446
  try (InputStream in = opener.reopen(0, new RuntimeException())) {
414
447
  BufferedReader r = new BufferedReader(new InputStreamReader(in));
415
448
  assertEquals("id,account,time,purchase,comment", r.readLine());
@@ -420,7 +453,7 @@ public class TestGcsFileInputPlugin
420
453
  public void testBase64()
421
454
  throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
422
455
  {
423
- Method method = GcsFileInputPlugin.class.getDeclaredMethod("base64Encode", String.class);
456
+ Method method = GcsFileInput.class.getDeclaredMethod("base64Encode", String.class);
424
457
  method.setAccessible(true);
425
458
 
426
459
  assertEquals("CgFj", method.invoke(plugin, "c"));
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-19 00:00:00.000000000 Z
11
+ date: 2018-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -58,8 +58,12 @@ files:
58
58
  - gradlew.bat
59
59
  - lib/embulk/input/gcs.rb
60
60
  - settings.gradle
61
+ - src/main/java/org/embulk/input/gcs/FileList.java
61
62
  - src/main/java/org/embulk/input/gcs/GcsAuthentication.java
63
+ - src/main/java/org/embulk/input/gcs/GcsFileInput.java
62
64
  - src/main/java/org/embulk/input/gcs/GcsFileInputPlugin.java
65
+ - src/main/java/org/embulk/input/gcs/PluginTask.java
66
+ - src/main/java/org/embulk/input/gcs/SingleFileProvider.java
63
67
  - src/test/java/org/embulk/input/gcs/TestGcsAuthentication.java
64
68
  - src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
65
69
  - src/test/resources/sample_01.csv
@@ -67,7 +71,7 @@ files:
67
71
  - src/test/resources/secretkeys.tar.enc
68
72
  - classpath/commons-codec-1.3.jar
69
73
  - classpath/commons-logging-1.1.1.jar
70
- - classpath/embulk-input-gcs-0.2.5.jar
74
+ - classpath/embulk-input-gcs-0.2.6.jar
71
75
  - classpath/google-api-client-1.21.0.jar
72
76
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
73
77
  - classpath/google-http-client-1.21.0.jar
Binary file