embulk-input-gcs 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 84c5f3e02ea5ad9dd6b8536ad9c35bc5e893f785
4
- data.tar.gz: 6e5cc8dda9c62e0515db17cf1c486b902609b853
3
+ metadata.gz: 8e9248a8e1b730f9885caa93b24cbf47dfb8a8c0
4
+ data.tar.gz: 6405b783b5d7ea6a6b74f017946f69e9b7c5b3a6
5
5
  SHA512:
6
- metadata.gz: ad92f9ff0c884436d4e667d01f23ea088e6a68793941ec8efb87e7694d2770ecde5a0a2ad59da1e58649477be2bd1237843b729004113d447bdeba9c359f0d36
7
- data.tar.gz: 15087aa4dbd3365c88b3ab75cbf02de30cd9e0250bf3b372aeee155b2155239db68b75934d920120771e126b14825d7e4544bbca0bf5fb2dff3f483a6837eee1
6
+ metadata.gz: fbbce0c9ba052477d392ba00586d6951f00924a9d163409b3394ac393035b9b33c5acb2aeecc2111b1a3febc8d59ab0b9401a4b680ed6e578cfc5e57ab844e3d
7
+ data.tar.gz: 23604910383ac32a7aa66b0afb94144cd23bea0b26866748d24ec8189f27ea313e4a9cda0a599a1aefce8be84e0bcc3957754dbe95e6d5755dfc517666685806
data/.travis.yml ADDED
@@ -0,0 +1,23 @@
1
+ language: java
2
+
3
+ jdk:
4
+ - oraclejdk8
5
+ - oraclejdk7
6
+ - openjdk7
7
+
8
+ before_install:
9
+ - openssl aes-256-cbc -K $encrypted_79f1af2a2546_key -iv $encrypted_79f1af2a2546_iv
10
+ -in src/test/resources/secretkeys.tar.enc -out secretkeys.tar -d
11
+ - tar xvf secretkeys.tar
12
+
13
+ env:
14
+ global:
15
+ - GCP_EMAIL=unittest@embulk-input-gcs-test.iam.gserviceaccount.com
16
+ - GCP_BUCKET=embulk-input-gcs-test
17
+ - GCP_BUCKET_DIRECTORY=unittests_import
18
+ - GCP_P12_KEYFILE=./embulk-input-gcs-test-018324286daf.p12
19
+ - GCP_JSON_KEYFILE=./embulk-input-gcs-test-841948b819cf.json
20
+
21
+ script:
22
+ - ./gradlew gem
23
+ - ./gradlew --info check jacocoTestReport
data/CHANGELOG.md ADDED
@@ -0,0 +1,49 @@
1
+ ## 0.2.1 - 2016-08-04
2
+ * [maintenance] Use retry logic when generate GCS client [#21](https://github.com/embulk/embulk-input-gcs/pull/21)
3
+
4
+ ## 0.2.0 - 2016-06-03
5
+ * [new feature] Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
6
+
7
+ ## 0.1.13 - 2016-02-04
8
+ * [maintenance] Upgraded embulk to v0.8.2 [#14](https://github.com/embulk/embulk-input-gcs/pull/14)
9
+ * [maintenance] Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0 [#15](https://github.com/embulk/embulk-input-gcs/pull/15)
10
+ * [maintenance] Updated Google Cloud Storage API Client Library from v1-rev27-1.19.1 to v1-rev59-1.21.0 [#15](https://github.com/embulk/embulk-input-gcs/pull/15)
11
+
12
+ ## 0.1.11 - 2016-01-25
13
+ * [maintenance] Added retry logic [#11](https://github.com/embulk/embulk-input-gcs/pull/11)
14
+
15
+ ## 0.1.10 - 2015-11-07
16
+
17
+ * [maintenance] Fix resume download logics [#10](https://github.com/embulk/embulk-input-gcs/pull/10)
18
+ * [maintenance] Throw ConfigException when files listing failed. @muga thanks! [#9](https://github.com/embulk/embulk-input-gcs/pull/9)
19
+
20
+ ## 0.1.9 - 2015-10-30
21
+
22
+ * [maintenance] Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks! [#7](https://github.com/embulk/embulk-input-gcs/pull/7)
23
+
24
+ ## 0.1.8 - 2015-10-29
25
+
26
+ * [maintenance] Added unit tests [#8](https://github.com/embulk/embulk-input-gcs/pull/8)
27
+
28
+ ## 0.1.7 - 2015-10-06
29
+
30
+ * [new feature] Added new auth method - json_keyfile of GCP(Google Cloud Platform)'s service account [#5](https://github.com/embulk/embulk-input-gcs/pull/5)
31
+ * [maintenance] Supported mapreduce-executor [#4](https://github.com/embulk/embulk-input-gcs/pull/4)
32
+
33
+ ## 0.1.6 - 2015-09-05
34
+
35
+ * [new feature] Added new auth method - pre-defined access token of GCE(Google Compute Engine) [#3](https://github.com/embulk/embulk-input-gcs/pull/3)
36
+
37
+ ## 0.1.5 - 2015-08-19
38
+
39
+ * [maintenance] Upgraded embulk version to 0.7.0
40
+ * [maintenance] Refactored
41
+
42
+ ## 0.1.4 - 2015-06-27
43
+
44
+ * [maintenance] Keep last last_path when input files is empty. @frsyuki thanks! [#1](https://github.com/embulk/embulk-input-gcs/pull/1)
45
+ * [maintenance] Refactored error handling logics.
46
+
47
+ ## 0.1.3 - 2015-03-16
48
+
49
+ * [maintenance] Changed supported Java version from 8 to 7
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # Google Cloud Storage file input plugin for Embulk
2
+ [![Build Status](https://travis-ci.org/embulk/embulk-input-gcs.svg?branch=master)](https://travis-ci.org/embulk/embulk-input-gcs)
2
3
 
3
4
  ## Overview
4
5
 
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.2.0"
20
+ version = "0.2.1"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -1,5 +1,6 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
+ import com.google.api.client.auth.oauth2.TokenResponseException;
3
4
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
4
5
  import com.google.api.client.googleapis.compute.ComputeCredential;
5
6
  import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
@@ -10,16 +11,21 @@ import com.google.api.client.json.JsonFactory;
10
11
  import com.google.api.client.json.jackson2.JacksonFactory;
11
12
  import com.google.api.services.storage.Storage;
12
13
  import com.google.api.services.storage.StorageScopes;
13
- import com.google.api.services.storage.model.Objects;
14
14
  import com.google.common.base.Optional;
15
+ import com.google.common.base.Throwables;
15
16
  import com.google.common.collect.ImmutableList;
17
+ import org.embulk.config.ConfigException;
16
18
  import org.embulk.spi.Exec;
19
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
20
+ import org.embulk.spi.util.RetryExecutor.Retryable;
17
21
  import org.slf4j.Logger;
22
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
18
23
 
19
24
  import java.io.File;
20
25
  import java.io.FileInputStream;
21
26
 
22
27
  import java.io.IOException;
28
+ import java.io.InterruptedIOException;
23
29
  import java.security.GeneralSecurityException;
24
30
  import java.util.Collections;
25
31
 
@@ -74,13 +80,13 @@ public class GcsAuthentication
74
80
  StorageScopes.DEVSTORAGE_READ_ONLY
75
81
  )
76
82
  )
77
- .setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
83
+ .setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.get()))
78
84
  .build();
79
85
  }
80
86
 
81
87
  private GoogleCredential getServiceAccountCredentialFromJsonFile() throws IOException
82
88
  {
83
- FileInputStream stream = new FileInputStream(jsonKeyFilePath.orNull());
89
+ FileInputStream stream = new FileInputStream(jsonKeyFilePath.get());
84
90
 
85
91
  return GoogleCredential.fromStream(stream, httpTransport, jsonFactory)
86
92
  .createScoped(Collections.singleton(StorageScopes.DEVSTORAGE_READ_ONLY));
@@ -99,16 +105,84 @@ public class GcsAuthentication
99
105
  return credential;
100
106
  }
101
107
 
102
- public Storage getGcsClient(String bucket) throws GoogleJsonResponseException, IOException
108
+ public Storage getGcsClient(final String bucket, int maxConnectionRetry) throws ConfigException, IOException
103
109
  {
104
- Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
105
- .setApplicationName(applicationName)
106
- .build();
110
+ try {
111
+ return retryExecutor()
112
+ .withRetryLimit(maxConnectionRetry)
113
+ .withInitialRetryWait(500)
114
+ .withMaxRetryWait(30 * 1000)
115
+ .runInterruptible(new Retryable<Storage>() {
116
+ @Override
117
+ public Storage call() throws IOException, RetryGiveupException
118
+ {
119
+ Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
120
+ .setApplicationName(applicationName)
121
+ .build();
122
+
123
+ // For throw ConfigException when authentication is fail.
124
+ long maxResults = 1;
125
+ client.objects().list(bucket).setMaxResults(maxResults).execute();
126
+
127
+ return client;
128
+ }
129
+
130
+ @Override
131
+ public boolean isRetryableException(Exception exception)
132
+ {
133
+ if (exception instanceof GoogleJsonResponseException || exception instanceof TokenResponseException) {
134
+ int statusCode;
135
+ if (exception instanceof GoogleJsonResponseException) {
136
+ statusCode = ((GoogleJsonResponseException) exception).getDetails().getCode();
137
+ }
138
+ else {
139
+ statusCode = ((TokenResponseException) exception).getStatusCode();
140
+ }
141
+ if (statusCode / 100 == 4) {
142
+ return false;
143
+ }
144
+ }
145
+ return true;
146
+ }
107
147
 
108
- // For throw IOException when authentication is fail.
109
- long maxResults = 1;
110
- Objects objects = client.objects().list(bucket).setMaxResults(maxResults).execute();
148
+ @Override
149
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
150
+ throws RetryGiveupException
151
+ {
152
+ String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
153
+ retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
154
+ if (retryCount % 3 == 0) {
155
+ log.warn(message, exception);
156
+ }
157
+ else {
158
+ log.warn(message);
159
+ }
160
+ }
111
161
 
112
- return client;
162
+ @Override
163
+ public void onGiveup(Exception firstException, Exception lastException)
164
+ throws RetryGiveupException
165
+ {
166
+ }
167
+ });
168
+ }
169
+ catch (RetryGiveupException ex) {
170
+ if (ex.getCause() instanceof GoogleJsonResponseException || ex.getCause() instanceof TokenResponseException) {
171
+ int statusCode = 0;
172
+ if (ex.getCause() instanceof GoogleJsonResponseException) {
173
+ statusCode = ((GoogleJsonResponseException) ex.getCause()).getDetails().getCode();
174
+ }
175
+ else if (ex.getCause() instanceof TokenResponseException) {
176
+ statusCode = ((TokenResponseException) ex.getCause()).getStatusCode();
177
+ }
178
+ if (statusCode / 100 == 4) {
179
+ throw new ConfigException(ex);
180
+ }
181
+ }
182
+ throw Throwables.propagate(ex);
183
+ }
184
+ catch (InterruptedException ex) {
185
+ throw new InterruptedIOException();
186
+ }
113
187
  }
114
188
  }
@@ -90,6 +90,10 @@ public class GcsFileInputPlugin
90
90
  List<String> getFiles();
91
91
  void setFiles(List<String> files);
92
92
 
93
+ @Config("max_connection_retry")
94
+ @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
95
+ int getMaxConnectionRetry();
96
+
93
97
  @ConfigInject
94
98
  BufferAllocator getBufferAllocator();
95
99
  }
@@ -198,7 +202,7 @@ public class GcsFileInputPlugin
198
202
  {
199
203
  Storage client = null;
200
204
  try {
201
- client = auth.getGcsClient(task.getBucket());
205
+ client = auth.getGcsClient(task.getBucket(), task.getMaxConnectionRetry());
202
206
  }
203
207
  catch (IOException ex) {
204
208
  throw new ConfigException(ex);
@@ -303,12 +307,14 @@ public class GcsFileInputPlugin
303
307
  private final Storage client;
304
308
  private final String bucket;
305
309
  private final String key;
310
+ private final int maxConnectionRetry;
306
311
 
307
- public GcsInputStreamReopener(Storage client, String bucket, String key)
312
+ public GcsInputStreamReopener(Storage client, String bucket, String key, int maxConnectionRetry)
308
313
  {
309
314
  this.client = client;
310
315
  this.bucket = bucket;
311
316
  this.key = key;
317
+ this.maxConnectionRetry = maxConnectionRetry;
312
318
  }
313
319
 
314
320
  @Override
@@ -316,7 +322,7 @@ public class GcsFileInputPlugin
316
322
  {
317
323
  try {
318
324
  return retryExecutor()
319
- .withRetryLimit(3)
325
+ .withRetryLimit(maxConnectionRetry)
320
326
  .withInitialRetryWait(500)
321
327
  .withMaxRetryWait(30 * 1000)
322
328
  .runInterruptible(new Retryable<InputStream>() {
@@ -395,6 +401,7 @@ public class GcsFileInputPlugin
395
401
  private final Storage client;
396
402
  private final String bucket;
397
403
  private final String key;
404
+ private final int maxConnectionRetry;
398
405
  private boolean opened = false;
399
406
 
400
407
  public SingleFileProvider(PluginTask task, int taskIndex)
@@ -402,6 +409,7 @@ public class GcsFileInputPlugin
402
409
  this.client = newGcsClient(task, newGcsAuth(task));
403
410
  this.bucket = task.getBucket();
404
411
  this.key = task.getFiles().get(taskIndex);
412
+ this.maxConnectionRetry = task.getMaxConnectionRetry();
405
413
  }
406
414
 
407
415
  @Override
@@ -413,7 +421,7 @@ public class GcsFileInputPlugin
413
421
  opened = true;
414
422
  Storage.Objects.Get getObject = client.objects().get(bucket, key);
415
423
 
416
- return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key));
424
+ return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key, maxConnectionRetry));
417
425
  }
418
426
 
419
427
  @Override
@@ -1,10 +1,10 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
3
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
4
- import com.google.api.client.googleapis.json.GoogleJsonResponseException;
5
4
  import com.google.api.services.storage.Storage;
6
5
  import com.google.common.base.Optional;
7
6
  import org.embulk.EmbulkTestRuntime;
7
+ import org.embulk.config.ConfigException;
8
8
  import org.junit.BeforeClass;
9
9
  import org.junit.Rule;
10
10
  import org.junit.Test;
@@ -25,6 +25,7 @@ public class TestGcsAuthentication
25
25
  private static Optional<String> GCP_JSON_KEYFILE;
26
26
  private static String GCP_BUCKET;
27
27
  private static final String GCP_APPLICATION_NAME = "embulk-input-gcs";
28
+ private static int MAX_CONNECTION_RETRY = 3;
28
29
 
29
30
  /*
30
31
  * This test case requires environment variables
@@ -97,12 +98,12 @@ public class TestGcsAuthentication
97
98
  GCP_APPLICATION_NAME
98
99
  );
99
100
 
100
- Storage client = auth.getGcsClient(GCP_BUCKET);
101
+ Storage client = auth.getGcsClient(GCP_BUCKET, MAX_CONNECTION_RETRY);
101
102
 
102
103
  assertEquals(Storage.class, client.getClass());
103
104
  }
104
105
 
105
- @Test(expected = GoogleJsonResponseException.class)
106
+ @Test(expected = ConfigException.class)
106
107
  public void testGetGcsClientUsingServiceAccountCredentialThrowJsonResponseException()
107
108
  throws NoSuchFieldException, IllegalAccessException, GeneralSecurityException, IOException
108
109
  {
@@ -114,7 +115,7 @@ public class TestGcsAuthentication
114
115
  GCP_APPLICATION_NAME
115
116
  );
116
117
 
117
- Storage client = auth.getGcsClient("non-exists-bucket");
118
+ Storage client = auth.getGcsClient("non-exists-bucket", MAX_CONNECTION_RETRY);
118
119
 
119
120
  assertEquals(Storage.class, client.getClass());
120
121
  }
@@ -162,12 +163,12 @@ public class TestGcsAuthentication
162
163
  GCP_APPLICATION_NAME
163
164
  );
164
165
 
165
- Storage client = auth.getGcsClient(GCP_BUCKET);
166
+ Storage client = auth.getGcsClient(GCP_BUCKET, MAX_CONNECTION_RETRY);
166
167
 
167
168
  assertEquals(Storage.class, client.getClass());
168
169
  }
169
170
 
170
- @Test(expected = GoogleJsonResponseException.class)
171
+ @Test(expected = ConfigException.class)
171
172
  public void testGetServiceAccountCredentialFromJsonThrowGoogleJsonResponseException()
172
173
  throws NoSuchFieldException, IllegalAccessException, GeneralSecurityException, IOException
173
174
  {
@@ -179,6 +180,6 @@ public class TestGcsAuthentication
179
180
  GCP_APPLICATION_NAME
180
181
  );
181
182
 
182
- Storage client = auth.getGcsClient("non-exists-bucket");
183
+ Storage client = auth.getGcsClient("non-exists-bucket", MAX_CONNECTION_RETRY);
183
184
  }
184
185
  }
@@ -52,6 +52,7 @@ public class TestGcsFileInputPlugin
52
52
  private static String GCP_BUCKET_DIRECTORY;
53
53
  private static String GCP_PATH_PREFIX;
54
54
  private static String GCP_APPLICATION_NAME;
55
+ private static int MAX_CONNECTION_RETRY = 3;
55
56
  private FileInputRunner runner;
56
57
  private MockPageOutput output;
57
58
 
@@ -358,7 +359,7 @@ public class TestGcsFileInputPlugin
358
359
  task.setFiles(plugin.listFiles(task, client));
359
360
 
360
361
  String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
361
- GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key);
362
+ GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
362
363
  try (InputStream in = opener.reopen(0, new RuntimeException())) {
363
364
  BufferedReader r = new BufferedReader(new InputStreamReader(in));
364
365
  assertEquals("id,account,time,purchase,comment", r.readLine());
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-02 00:00:00.000000000 Z
11
+ date: 2016-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -46,7 +46,8 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
- - ChangeLog
49
+ - .travis.yml
50
+ - CHANGELOG.md
50
51
  - README.md
51
52
  - build.gradle
52
53
  - config/checkstyle/checkstyle.xml
@@ -63,9 +64,10 @@ files:
63
64
  - src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
64
65
  - src/test/resources/sample_01.csv
65
66
  - src/test/resources/sample_02.csv
67
+ - src/test/resources/secretkeys.tar.enc
66
68
  - classpath/commons-codec-1.3.jar
67
69
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-input-gcs-0.2.0.jar
70
+ - classpath/embulk-input-gcs-0.2.1.jar
69
71
  - classpath/google-api-client-1.21.0.jar
70
72
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
71
73
  - classpath/google-http-client-1.21.0.jar
data/ChangeLog DELETED
@@ -1,48 +0,0 @@
1
- Release 0.2.0 - 2016-06-03
2
- * Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
3
-
4
- Release 0.1.13 - 2016-02-04
5
- * Upgraded embulk to v0.8.2
6
- * Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0
7
- * Updated Google Cloud Storage API Client Library from v1-rev27-1.19.1 to v1-rev59-1.21.0
8
-
9
- Release 0.1.11 - 2016-01-25
10
- * Added retry logic
11
-
12
- * Added retry logic
13
-
14
- Release 0.1.10 - 2015-11-07
15
-
16
- * Fix resume download logics
17
- * Throw ConfigException when files listing failed. @muga thanks!
18
-
19
- Release 0.1.9 - 2015-10-30
20
-
21
- * Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks!
22
-
23
- Release 0.1.8 - 2015-10-29
24
-
25
- * Added unit tests
26
-
27
- Release 0.1.7 - 2015-10-06
28
-
29
- * Added new auth method - json_keyfile of GCP(Google Cloud Platform)'s service account
30
- * Supported mapreduce-executor
31
-
32
- Release 0.1.6 - 2015-09-05
33
-
34
- * Added new auth method - pre-defined access token of GCE(Google Compute Engine)
35
-
36
- Release 0.1.5 - 2015-08-19
37
-
38
- * Upgraded embulk version to 0.7.0
39
- * Refactored
40
-
41
- Release 0.1.4 - 2015-06-27
42
-
43
- * Keep last last_path when input files is empty. @frsyuki thanks!
44
- * Refactored error handling logics.
45
-
46
- Release 0.1.3 - 2015-03-16
47
-
48
- * Changed supported Java version from 8 to 7