embulk-input-gcs 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 84c5f3e02ea5ad9dd6b8536ad9c35bc5e893f785
4
- data.tar.gz: 6e5cc8dda9c62e0515db17cf1c486b902609b853
3
+ metadata.gz: 8e9248a8e1b730f9885caa93b24cbf47dfb8a8c0
4
+ data.tar.gz: 6405b783b5d7ea6a6b74f017946f69e9b7c5b3a6
5
5
  SHA512:
6
- metadata.gz: ad92f9ff0c884436d4e667d01f23ea088e6a68793941ec8efb87e7694d2770ecde5a0a2ad59da1e58649477be2bd1237843b729004113d447bdeba9c359f0d36
7
- data.tar.gz: 15087aa4dbd3365c88b3ab75cbf02de30cd9e0250bf3b372aeee155b2155239db68b75934d920120771e126b14825d7e4544bbca0bf5fb2dff3f483a6837eee1
6
+ metadata.gz: fbbce0c9ba052477d392ba00586d6951f00924a9d163409b3394ac393035b9b33c5acb2aeecc2111b1a3febc8d59ab0b9401a4b680ed6e578cfc5e57ab844e3d
7
+ data.tar.gz: 23604910383ac32a7aa66b0afb94144cd23bea0b26866748d24ec8189f27ea313e4a9cda0a599a1aefce8be84e0bcc3957754dbe95e6d5755dfc517666685806
data/.travis.yml ADDED
@@ -0,0 +1,23 @@
1
+ language: java
2
+
3
+ jdk:
4
+ - oraclejdk8
5
+ - oraclejdk7
6
+ - openjdk7
7
+
8
+ before_install:
9
+ - openssl aes-256-cbc -K $encrypted_79f1af2a2546_key -iv $encrypted_79f1af2a2546_iv
10
+ -in src/test/resources/secretkeys.tar.enc -out secretkeys.tar -d
11
+ - tar xvf secretkeys.tar
12
+
13
+ env:
14
+ global:
15
+ - GCP_EMAIL=unittest@embulk-input-gcs-test.iam.gserviceaccount.com
16
+ - GCP_BUCKET=embulk-input-gcs-test
17
+ - GCP_BUCKET_DIRECTORY=unittests_import
18
+ - GCP_P12_KEYFILE=./embulk-input-gcs-test-018324286daf.p12
19
+ - GCP_JSON_KEYFILE=./embulk-input-gcs-test-841948b819cf.json
20
+
21
+ script:
22
+ - ./gradlew gem
23
+ - ./gradlew --info check jacocoTestReport
data/CHANGELOG.md ADDED
@@ -0,0 +1,49 @@
1
+ ## 0.2.1 - 2016-08-04
2
+ * [maintenance] Use retry logic when generate GCS client [#21](https://github.com/embulk/embulk-input-gcs/pull/21)
3
+
4
+ ## 0.2.0 - 2016-06-03
5
+ * [new feature] Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
6
+
7
+ ## 0.1.13 - 2016-02-04
8
+ * [maintenance] Upgraded embulk to v0.8.2 [#14](https://github.com/embulk/embulk-input-gcs/pull/14)
9
+ * [maintenance] Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0 [#15](https://github.com/embulk/embulk-input-gcs/pull/15)
10
+ * [maintenance] Updated Google Cloud Storage API Client Library from v1-rev27-1.19.1 to v1-rev59-1.21.0 [#15](https://github.com/embulk/embulk-input-gcs/pull/15)
11
+
12
+ ## 0.1.11 - 2016-01-25
13
+ * [maintenance] Added retry logic [#11](https://github.com/embulk/embulk-input-gcs/pull/11)
14
+
15
+ ## 0.1.10 - 2015-11-07
16
+
17
+ * [maintenance] Fix resume download logics [#10](https://github.com/embulk/embulk-input-gcs/pull/10)
18
+ * [maintenance] Throw ConfigException when files listing failed. @muga thanks! [#9](https://github.com/embulk/embulk-input-gcs/pull/9)
19
+
20
+ ## 0.1.9 - 2015-10-30
21
+
22
+ * [maintenance] Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks! [#7](https://github.com/embulk/embulk-input-gcs/pull/7)
23
+
24
+ ## 0.1.8 - 2015-10-29
25
+
26
+ * [maintenance] Added unit tests [#8](https://github.com/embulk/embulk-input-gcs/pull/8)
27
+
28
+ ## 0.1.7 - 2015-10-06
29
+
30
+ * [new feature] Added new auth method - json_keyfile of GCP(Google Cloud Platform)'s service account [#5](https://github.com/embulk/embulk-input-gcs/pull/5)
31
+ * [maintenance] Supported mapreduce-executor [#4](https://github.com/embulk/embulk-input-gcs/pull/4)
32
+
33
+ ## 0.1.6 - 2015-09-05
34
+
35
+ * [new feature] Added new auth method - pre-defined access token of GCE(Google Compute Engine) [#3](https://github.com/embulk/embulk-input-gcs/pull/3)
36
+
37
+ ## 0.1.5 - 2015-08-19
38
+
39
+ * [maintenance] Upgraded embulk version to 0.7.0
40
+ * [maintenance] Refactored
41
+
42
+ ## 0.1.4 - 2015-06-27
43
+
44
+ * [maintenance] Keep last last_path when input files is empty. @frsyuki thanks! [#1](https://github.com/embulk/embulk-input-gcs/pull/1)
45
+ * [maintenance] Refactored error handling logics.
46
+
47
+ ## 0.1.3 - 2015-03-16
48
+
49
+ * [maintenance] Changed supported Java version from 8 to 7
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # Google Cloud Storage file input plugin for Embulk
2
+ [![Build Status](https://travis-ci.org/embulk/embulk-input-gcs.svg?branch=master)](https://travis-ci.org/embulk/embulk-input-gcs)
2
3
 
3
4
  ## Overview
4
5
 
data/build.gradle CHANGED
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.2.0"
20
+ version = "0.2.1"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -1,5 +1,6 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
+ import com.google.api.client.auth.oauth2.TokenResponseException;
3
4
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
4
5
  import com.google.api.client.googleapis.compute.ComputeCredential;
5
6
  import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
@@ -10,16 +11,21 @@ import com.google.api.client.json.JsonFactory;
10
11
  import com.google.api.client.json.jackson2.JacksonFactory;
11
12
  import com.google.api.services.storage.Storage;
12
13
  import com.google.api.services.storage.StorageScopes;
13
- import com.google.api.services.storage.model.Objects;
14
14
  import com.google.common.base.Optional;
15
+ import com.google.common.base.Throwables;
15
16
  import com.google.common.collect.ImmutableList;
17
+ import org.embulk.config.ConfigException;
16
18
  import org.embulk.spi.Exec;
19
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
20
+ import org.embulk.spi.util.RetryExecutor.Retryable;
17
21
  import org.slf4j.Logger;
22
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
18
23
 
19
24
  import java.io.File;
20
25
  import java.io.FileInputStream;
21
26
 
22
27
  import java.io.IOException;
28
+ import java.io.InterruptedIOException;
23
29
  import java.security.GeneralSecurityException;
24
30
  import java.util.Collections;
25
31
 
@@ -74,13 +80,13 @@ public class GcsAuthentication
74
80
  StorageScopes.DEVSTORAGE_READ_ONLY
75
81
  )
76
82
  )
77
- .setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.orNull()))
83
+ .setServiceAccountPrivateKeyFromP12File(new File(p12KeyFilePath.get()))
78
84
  .build();
79
85
  }
80
86
 
81
87
  private GoogleCredential getServiceAccountCredentialFromJsonFile() throws IOException
82
88
  {
83
- FileInputStream stream = new FileInputStream(jsonKeyFilePath.orNull());
89
+ FileInputStream stream = new FileInputStream(jsonKeyFilePath.get());
84
90
 
85
91
  return GoogleCredential.fromStream(stream, httpTransport, jsonFactory)
86
92
  .createScoped(Collections.singleton(StorageScopes.DEVSTORAGE_READ_ONLY));
@@ -99,16 +105,84 @@ public class GcsAuthentication
99
105
  return credential;
100
106
  }
101
107
 
102
- public Storage getGcsClient(String bucket) throws GoogleJsonResponseException, IOException
108
+ public Storage getGcsClient(final String bucket, int maxConnectionRetry) throws ConfigException, IOException
103
109
  {
104
- Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
105
- .setApplicationName(applicationName)
106
- .build();
110
+ try {
111
+ return retryExecutor()
112
+ .withRetryLimit(maxConnectionRetry)
113
+ .withInitialRetryWait(500)
114
+ .withMaxRetryWait(30 * 1000)
115
+ .runInterruptible(new Retryable<Storage>() {
116
+ @Override
117
+ public Storage call() throws IOException, RetryGiveupException
118
+ {
119
+ Storage client = new Storage.Builder(httpTransport, jsonFactory, credentials)
120
+ .setApplicationName(applicationName)
121
+ .build();
122
+
123
+ // For throw ConfigException when authentication is fail.
124
+ long maxResults = 1;
125
+ client.objects().list(bucket).setMaxResults(maxResults).execute();
126
+
127
+ return client;
128
+ }
129
+
130
+ @Override
131
+ public boolean isRetryableException(Exception exception)
132
+ {
133
+ if (exception instanceof GoogleJsonResponseException || exception instanceof TokenResponseException) {
134
+ int statusCode;
135
+ if (exception instanceof GoogleJsonResponseException) {
136
+ statusCode = ((GoogleJsonResponseException) exception).getDetails().getCode();
137
+ }
138
+ else {
139
+ statusCode = ((TokenResponseException) exception).getStatusCode();
140
+ }
141
+ if (statusCode / 100 == 4) {
142
+ return false;
143
+ }
144
+ }
145
+ return true;
146
+ }
107
147
 
108
- // For throw IOException when authentication is fail.
109
- long maxResults = 1;
110
- Objects objects = client.objects().list(bucket).setMaxResults(maxResults).execute();
148
+ @Override
149
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
150
+ throws RetryGiveupException
151
+ {
152
+ String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
153
+ retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
154
+ if (retryCount % 3 == 0) {
155
+ log.warn(message, exception);
156
+ }
157
+ else {
158
+ log.warn(message);
159
+ }
160
+ }
111
161
 
112
- return client;
162
+ @Override
163
+ public void onGiveup(Exception firstException, Exception lastException)
164
+ throws RetryGiveupException
165
+ {
166
+ }
167
+ });
168
+ }
169
+ catch (RetryGiveupException ex) {
170
+ if (ex.getCause() instanceof GoogleJsonResponseException || ex.getCause() instanceof TokenResponseException) {
171
+ int statusCode = 0;
172
+ if (ex.getCause() instanceof GoogleJsonResponseException) {
173
+ statusCode = ((GoogleJsonResponseException) ex.getCause()).getDetails().getCode();
174
+ }
175
+ else if (ex.getCause() instanceof TokenResponseException) {
176
+ statusCode = ((TokenResponseException) ex.getCause()).getStatusCode();
177
+ }
178
+ if (statusCode / 100 == 4) {
179
+ throw new ConfigException(ex);
180
+ }
181
+ }
182
+ throw Throwables.propagate(ex);
183
+ }
184
+ catch (InterruptedException ex) {
185
+ throw new InterruptedIOException();
186
+ }
113
187
  }
114
188
  }
@@ -90,6 +90,10 @@ public class GcsFileInputPlugin
90
90
  List<String> getFiles();
91
91
  void setFiles(List<String> files);
92
92
 
93
+ @Config("max_connection_retry")
94
+ @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
95
+ int getMaxConnectionRetry();
96
+
93
97
  @ConfigInject
94
98
  BufferAllocator getBufferAllocator();
95
99
  }
@@ -198,7 +202,7 @@ public class GcsFileInputPlugin
198
202
  {
199
203
  Storage client = null;
200
204
  try {
201
- client = auth.getGcsClient(task.getBucket());
205
+ client = auth.getGcsClient(task.getBucket(), task.getMaxConnectionRetry());
202
206
  }
203
207
  catch (IOException ex) {
204
208
  throw new ConfigException(ex);
@@ -303,12 +307,14 @@ public class GcsFileInputPlugin
303
307
  private final Storage client;
304
308
  private final String bucket;
305
309
  private final String key;
310
+ private final int maxConnectionRetry;
306
311
 
307
- public GcsInputStreamReopener(Storage client, String bucket, String key)
312
+ public GcsInputStreamReopener(Storage client, String bucket, String key, int maxConnectionRetry)
308
313
  {
309
314
  this.client = client;
310
315
  this.bucket = bucket;
311
316
  this.key = key;
317
+ this.maxConnectionRetry = maxConnectionRetry;
312
318
  }
313
319
 
314
320
  @Override
@@ -316,7 +322,7 @@ public class GcsFileInputPlugin
316
322
  {
317
323
  try {
318
324
  return retryExecutor()
319
- .withRetryLimit(3)
325
+ .withRetryLimit(maxConnectionRetry)
320
326
  .withInitialRetryWait(500)
321
327
  .withMaxRetryWait(30 * 1000)
322
328
  .runInterruptible(new Retryable<InputStream>() {
@@ -395,6 +401,7 @@ public class GcsFileInputPlugin
395
401
  private final Storage client;
396
402
  private final String bucket;
397
403
  private final String key;
404
+ private final int maxConnectionRetry;
398
405
  private boolean opened = false;
399
406
 
400
407
  public SingleFileProvider(PluginTask task, int taskIndex)
@@ -402,6 +409,7 @@ public class GcsFileInputPlugin
402
409
  this.client = newGcsClient(task, newGcsAuth(task));
403
410
  this.bucket = task.getBucket();
404
411
  this.key = task.getFiles().get(taskIndex);
412
+ this.maxConnectionRetry = task.getMaxConnectionRetry();
405
413
  }
406
414
 
407
415
  @Override
@@ -413,7 +421,7 @@ public class GcsFileInputPlugin
413
421
  opened = true;
414
422
  Storage.Objects.Get getObject = client.objects().get(bucket, key);
415
423
 
416
- return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key));
424
+ return new ResumableInputStream(getObject.executeMediaAsInputStream(), new GcsInputStreamReopener(client, bucket, key, maxConnectionRetry));
417
425
  }
418
426
 
419
427
  @Override
@@ -1,10 +1,10 @@
1
1
  package org.embulk.input.gcs;
2
2
 
3
3
  import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
4
- import com.google.api.client.googleapis.json.GoogleJsonResponseException;
5
4
  import com.google.api.services.storage.Storage;
6
5
  import com.google.common.base.Optional;
7
6
  import org.embulk.EmbulkTestRuntime;
7
+ import org.embulk.config.ConfigException;
8
8
  import org.junit.BeforeClass;
9
9
  import org.junit.Rule;
10
10
  import org.junit.Test;
@@ -25,6 +25,7 @@ public class TestGcsAuthentication
25
25
  private static Optional<String> GCP_JSON_KEYFILE;
26
26
  private static String GCP_BUCKET;
27
27
  private static final String GCP_APPLICATION_NAME = "embulk-input-gcs";
28
+ private static int MAX_CONNECTION_RETRY = 3;
28
29
 
29
30
  /*
30
31
  * This test case requires environment variables
@@ -97,12 +98,12 @@ public class TestGcsAuthentication
97
98
  GCP_APPLICATION_NAME
98
99
  );
99
100
 
100
- Storage client = auth.getGcsClient(GCP_BUCKET);
101
+ Storage client = auth.getGcsClient(GCP_BUCKET, MAX_CONNECTION_RETRY);
101
102
 
102
103
  assertEquals(Storage.class, client.getClass());
103
104
  }
104
105
 
105
- @Test(expected = GoogleJsonResponseException.class)
106
+ @Test(expected = ConfigException.class)
106
107
  public void testGetGcsClientUsingServiceAccountCredentialThrowJsonResponseException()
107
108
  throws NoSuchFieldException, IllegalAccessException, GeneralSecurityException, IOException
108
109
  {
@@ -114,7 +115,7 @@ public class TestGcsAuthentication
114
115
  GCP_APPLICATION_NAME
115
116
  );
116
117
 
117
- Storage client = auth.getGcsClient("non-exists-bucket");
118
+ Storage client = auth.getGcsClient("non-exists-bucket", MAX_CONNECTION_RETRY);
118
119
 
119
120
  assertEquals(Storage.class, client.getClass());
120
121
  }
@@ -162,12 +163,12 @@ public class TestGcsAuthentication
162
163
  GCP_APPLICATION_NAME
163
164
  );
164
165
 
165
- Storage client = auth.getGcsClient(GCP_BUCKET);
166
+ Storage client = auth.getGcsClient(GCP_BUCKET, MAX_CONNECTION_RETRY);
166
167
 
167
168
  assertEquals(Storage.class, client.getClass());
168
169
  }
169
170
 
170
- @Test(expected = GoogleJsonResponseException.class)
171
+ @Test(expected = ConfigException.class)
171
172
  public void testGetServiceAccountCredentialFromJsonThrowGoogleJsonResponseException()
172
173
  throws NoSuchFieldException, IllegalAccessException, GeneralSecurityException, IOException
173
174
  {
@@ -179,6 +180,6 @@ public class TestGcsAuthentication
179
180
  GCP_APPLICATION_NAME
180
181
  );
181
182
 
182
- Storage client = auth.getGcsClient("non-exists-bucket");
183
+ Storage client = auth.getGcsClient("non-exists-bucket", MAX_CONNECTION_RETRY);
183
184
  }
184
185
  }
@@ -52,6 +52,7 @@ public class TestGcsFileInputPlugin
52
52
  private static String GCP_BUCKET_DIRECTORY;
53
53
  private static String GCP_PATH_PREFIX;
54
54
  private static String GCP_APPLICATION_NAME;
55
+ private static int MAX_CONNECTION_RETRY = 3;
55
56
  private FileInputRunner runner;
56
57
  private MockPageOutput output;
57
58
 
@@ -358,7 +359,7 @@ public class TestGcsFileInputPlugin
358
359
  task.setFiles(plugin.listFiles(task, client));
359
360
 
360
361
  String key = GCP_BUCKET_DIRECTORY + "sample_01.csv";
361
- GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key);
362
+ GcsFileInputPlugin.GcsInputStreamReopener opener = new GcsFileInputPlugin.GcsInputStreamReopener(client, GCP_BUCKET, key, MAX_CONNECTION_RETRY);
362
363
  try (InputStream in = opener.reopen(0, new RuntimeException())) {
363
364
  BufferedReader r = new BufferedReader(new InputStreamReader(in));
364
365
  assertEquals("id,account,time,purchase,comment", r.readLine());
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-02 00:00:00.000000000 Z
11
+ date: 2016-08-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -46,7 +46,8 @@ extensions: []
46
46
  extra_rdoc_files: []
47
47
  files:
48
48
  - .gitignore
49
- - ChangeLog
49
+ - .travis.yml
50
+ - CHANGELOG.md
50
51
  - README.md
51
52
  - build.gradle
52
53
  - config/checkstyle/checkstyle.xml
@@ -63,9 +64,10 @@ files:
63
64
  - src/test/java/org/embulk/input/gcs/TestGcsFileInputPlugin.java
64
65
  - src/test/resources/sample_01.csv
65
66
  - src/test/resources/sample_02.csv
67
+ - src/test/resources/secretkeys.tar.enc
66
68
  - classpath/commons-codec-1.3.jar
67
69
  - classpath/commons-logging-1.1.1.jar
68
- - classpath/embulk-input-gcs-0.2.0.jar
70
+ - classpath/embulk-input-gcs-0.2.1.jar
69
71
  - classpath/google-api-client-1.21.0.jar
70
72
  - classpath/google-api-services-storage-v1-rev59-1.21.0.jar
71
73
  - classpath/google-http-client-1.21.0.jar
data/ChangeLog DELETED
@@ -1,48 +0,0 @@
1
- Release 0.2.0 - 2016-06-03
2
- * Support path option to allow to specify list of target objects directly @sonots thanks! [#17](https://github.com/embulk/embulk-input-gcs/pull/17)
3
-
4
- Release 0.1.13 - 2016-02-04
5
- * Upgraded embulk to v0.8.2
6
- * Updated Google HTTP Client Library from 1.19.0 to 2.1.21.0
7
- * Updated Google Cloud Storage API Client Library from v1-rev27-1.19.1 to v1-rev59-1.21.0
8
-
9
- Release 0.1.11 - 2016-01-25
10
- * Added retry logic
11
-
12
- * Added retry logic
13
-
14
- Release 0.1.10 - 2015-11-07
15
-
16
- * Fix resume download logics
17
- * Throw ConfigException when files listing failed. @muga thanks!
18
-
19
- Release 0.1.9 - 2015-10-30
20
-
21
- * Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks!
22
-
23
- Release 0.1.8 - 2015-10-29
24
-
25
- * Added unit tests
26
-
27
- Release 0.1.7 - 2015-10-06
28
-
29
- * Added new auth method - json_keyfile of GCP(Google Cloud Platform)'s service account
30
- * Supported mapreduce-executor
31
-
32
- Release 0.1.6 - 2015-09-05
33
-
34
- * Added new auth method - pre-defined access token of GCE(Google Compute Engine)
35
-
36
- Release 0.1.5 - 2015-08-19
37
-
38
- * Upgraded embulk version to 0.7.0
39
- * Refactored
40
-
41
- Release 0.1.4 - 2015-06-27
42
-
43
- * Keep last last_path when input files is empty. @frsyuki thanks!
44
- * Refactored error handling logics.
45
-
46
- Release 0.1.3 - 2015-03-16
47
-
48
- * Changed supported Java version from 8 to 7