embulk-input-gcs 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed0415e9a36b04db434dd09b655ac159ce062b2a
4
- data.tar.gz: 868028f5e59ddaf23771a962b635ecc34c344f28
3
+ metadata.gz: 01e9c70bf2d1f4c9a25784ab64828913c056078d
4
+ data.tar.gz: 22d6b84e12d965045719b49026924f5247a6609a
5
5
  SHA512:
6
- metadata.gz: b1d8f6ee7e69eddaad96547196bfd0b193c5c8141323aad8b84945f197fbe2f464feb273f20f7a6da6d978559ff8a3ce0e683e622984e76188a77d0f4ac82a91
7
- data.tar.gz: f2c7f5db67082badf48838b6fc4b1af372cc525ca70b5668656e5d9faf8eadf0dbd346f010433a824ad220a8394ee86a5cbc2758f3ef70082e8c6b578340026a
6
+ metadata.gz: d090628aec5d9512976ceff29215f99e04eb939e4e927106bca4bee47fe95cef7e636fc74b239cde7222cc71f5b259b43905b36ad90d3c515f8b97f8b6c9c220
7
+ data.tar.gz: 0ad764f0f9f85818b5924b3477bfe7d65806113ca56e689b45e919d3e8c0b156bfbb39cf60e9f349bb1005264655eaf0ad4446115f0254c554c5e425edac746a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.2.5 - 2017-05-19
2
+ * [maintenance] Fix InputStream handling to avoid plugin get less records than expected [#27](https://github.com/embulk/embulk-input-gcs/pull/27)
3
+
1
4
  ## 0.2.4 - 2017-03-16
2
5
  * [maintenance] Fix possibility to throw NPE when authentication [#25](https://github.com/embulk/embulk-input-gcs/pull/25)
3
6
 
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.2.4"
20
+ version = "0.2.5"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -1,6 +1,7 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
3
  import com.google.api.client.http.HttpResponseException;
4
+ import com.google.api.client.util.IOUtils;
4
5
  import com.google.api.services.storage.Storage;
5
6
  import com.google.api.services.storage.model.Bucket;
6
7
  import com.google.api.services.storage.model.Objects;
@@ -33,6 +34,11 @@ import org.embulk.spi.util.RetryExecutor.Retryable;
33
34
  import org.slf4j.Logger;
34
35
  import static org.embulk.spi.util.RetryExecutor.retryExecutor;
35
36
 
37
+ import java.io.BufferedInputStream;
38
+ import java.io.BufferedOutputStream;
39
+ import java.io.File;
40
+ import java.io.FileInputStream;
41
+ import java.io.FileOutputStream;
36
42
  import java.io.IOException;
37
43
  import java.io.InputStream;
38
44
  import java.io.InterruptedIOException;
@@ -250,19 +256,21 @@ public class GcsFileInputPlugin
250
256
  String lastKey = lastPath.isPresent() ? base64Encode(lastPath.get()) : null;
251
257
 
252
258
  // @see https://cloud.google.com/storage/docs/json_api/v1/objects#resource
253
- try {
254
- Storage.Buckets.Get getBucket = client.buckets().get(bucket);
255
- getBucket.setProjection("full");
256
- Bucket bk = getBucket.execute();
257
-
258
- log.debug("bucket name: " + bucket);
259
- log.debug("bucket location: " + bk.getLocation());
260
- log.debug("bucket timeCreated: " + bk.getTimeCreated());
261
- log.debug("bucket owner: " + bk.getOwner());
262
- }
263
- catch (IOException e) {
264
- log.warn("Could not access to bucket:" + bucket);
265
- log.warn(e.getMessage());
259
+ if (log.isDebugEnabled()) {
260
+ try {
261
+ Storage.Buckets.Get getBucket = client.buckets().get(bucket);
262
+ getBucket.setProjection("full");
263
+ Bucket bk = getBucket.execute();
264
+
265
+ log.debug("bucket name: " + bucket);
266
+ log.debug("bucket location: " + bk.getLocation());
267
+ log.debug("bucket timeCreated: " + bk.getTimeCreated());
268
+ log.debug("bucket owner: " + bk.getOwner());
269
+ }
270
+ catch (IOException e) {
271
+ log.warn("Could not access to bucket:" + bucket);
272
+ log.warn(e.getMessage());
273
+ }
266
274
  }
267
275
 
268
276
  try {
@@ -312,13 +320,15 @@ public class GcsFileInputPlugin
312
320
  implements ResumableInputStream.Reopener
313
321
  {
314
322
  private final Logger log = Exec.getLogger(GcsInputStreamReopener.class);
323
+ private final File tempFile;
315
324
  private final Storage client;
316
325
  private final String bucket;
317
326
  private final String key;
318
327
  private final int maxConnectionRetry;
319
328
 
320
- public GcsInputStreamReopener(Storage client, String bucket, String key, int maxConnectionRetry)
329
+ public GcsInputStreamReopener(File tempFile, Storage client, String bucket, String key, int maxConnectionRetry)
321
330
  {
331
+ this.tempFile = tempFile;
322
332
  this.client = client;
323
333
  this.bucket = bucket;
324
334
  this.key = key;
@@ -335,11 +345,15 @@ public class GcsFileInputPlugin
335
345
  .withMaxRetryWait(30 * 1000)
336
346
  .runInterruptible(new Retryable<InputStream>() {
337
347
  @Override
338
- public InputStream call() throws InterruptedIOException, IOException
348
+ public InputStream call() throws IOException
339
349
  {
340
350
  log.warn(String.format("GCS read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
341
351
  Storage.Objects.Get getObject = client.objects().get(bucket, key);
342
- return getObject.executeMediaAsInputStream();
352
+
353
+ try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
354
+ IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
355
+ }
356
+ return new BufferedInputStream(new FileInputStream(tempFile));
343
357
  }
344
358
 
345
359
  @Override
@@ -428,8 +442,11 @@ public class GcsFileInputPlugin
428
442
  }
429
443
  opened = true;
430
444
  Storage.Objects.Get getObject = client.objects().get(bucket, key);
431
-
432
- return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key, maxConnectionRetry));
445
+ File tempFile = Exec.getTempFileSpace().createTempFile();
446
+ try (BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(tempFile))) {
447
+ IOUtils.copy(getObject.executeMediaAsInputStream(), outputStream);
448
+ }
449
+ return new ResumableInputStream(new BufferedInputStream(new FileInputStream(tempFile)), new GcsInputStreamReopener(tempFile, client, bucket, key, maxConnectionRetry));
433
450
  }
434
451
 
435
452
  @Override
@@ -27,6 +27,7 @@ import org.junit.Test;
27
27
 
28
28
  import java.io.BufferedReader;
29
29
 
30
+ import java.io.File;
30
31
  import java.io.IOException;
31
32
  import java.io.InputStream;
32
33
  import java.io.InputStreamReader;
@@ -404,10 +405,11 @@ public class TestGcsFileInputPlugin
404
405
  Method method = GcsFileInputPlugin.class.getDeclaredMethod("newGcsAuth", PluginTask.class);
405
406
  method.setAccessible(true);
406
407
  Storage client = plugin.newGcsClient(task, (GcsAuthentication) method.invoke(plugin, task));
408
+ File tempFile = Exec.getTempFileSpace().createTempFile();
407
409
  task.setFiles(plugin.listFiles(task, client));
408
410
 
409
411
  String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
410
- GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
412
+ GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(tempFile, client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
411
413
  try (InputStream in = opener.reopen(0, new RuntimeException())) {
412
414
  BufferedReader r = new BufferedReader(new InputStreamReader(in));
413
415
  assertEquals("id,account,time,purchase,comment", r.readLine());
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-16 00:00:00.000000000 Z
11
+ date: 2017-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ files:
67
67
  - src/test/resources/secretkeys.tar.enc
68
68
  - classpath/commons-codec-1.3.jar
69
69
  - classpath/commons-logging-1.1.1.jar
70
- - classpath/embulk-input-gcs-0.2.4.jar
70
+ - classpath/embulk-input-gcs-0.2.5.jar
71
71
  - classpath/google-api-client-1.21.0.jar
72
72
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
73
73
  - classpath/google-http-client-1.21.0.jar
Binary file