embulk-input-gcs 0.1.9 → 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d25891998dc1d0794b0fcfac04ea7a011f777f9c
4
- data.tar.gz: cd7d9977e6bfa8df5212181ba49aaacf7998110b
3
+ metadata.gz: fd9bbc407e6e93967a7d08fcd72b4d4f0c419659
4
+ data.tar.gz: 86ebeaf4dace9de06146a4671d6fe3db69089b5c
5
5
  SHA512:
6
- metadata.gz: 9beb8bfab93341c05b076fb1937096cb86ab850da685806cd1aa29cf65ffea356e318bfdddc566321499e4ffa8b5ccac9c8028b0dd83aa20f29a6955c8556e9a
7
- data.tar.gz: d3f872c0dd1460e5234b836761fbd7ea5f723b17371d881e4a91989b62edd10cfa98a0110eb26af5940c684a318164b1448f0ccd73962bc5db126b5c60070e8b
6
+ metadata.gz: 783728ec908c8f598acfac3ec555075a55b7249943afc76d030f2fe57d9353ddbb78cbecabdba9504189d943b9574555d3ba301a4ee654f5c78b81c179abdef4
7
+ data.tar.gz: 0e8815c728a060f749e8d797938235b72eed889c2155b635256646e5816432326555648cf229ef76a01e969aca6ec659eb5ce976c43f32ee64f6a3d3ce451d69
data/ChangeLog CHANGED
@@ -1,6 +1,11 @@
1
+ Release 0.1.10 - 2015-11-07
2
+
3
+ * Fix resume download logics
4
+ * Throw ConfigException when files listing failed. @muga thanks!
5
+
1
6
  Release 0.1.9 - 2015-10-30
2
7
 
3
- Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks!
8
+ * Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks!
4
9
 
5
10
  Release 0.1.8 - 2015-10-29
6
11
 
@@ -27,4 +32,4 @@ Release 0.1.4 - 2015-06-27
27
32
 
28
33
  Release 0.1.3 - 2015-03-16
29
34
 
30
- * Changed supported Java version from 8 to 7
35
+ * Changed supported Java version from 8 to 7
@@ -16,7 +16,7 @@ configurations {
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
19
- version = "0.1.9"
19
+ version = "0.1.10"
20
20
 
21
21
  dependencies {
22
22
  compile "org.embulk:embulk-core:0.7.5"
@@ -7,10 +7,13 @@ import java.io.IOException;
7
7
  import java.io.InputStream;
8
8
  import java.math.BigInteger;
9
9
 
10
+ import com.google.api.client.http.HttpResponseException;
10
11
  import com.google.common.collect.ImmutableList;
11
12
  import com.google.common.base.Optional;
12
13
  import com.google.common.base.Function;
13
14
  import com.google.common.base.Throwables;
15
+ import com.google.common.base.Charsets;
16
+ import com.google.common.io.BaseEncoding;
14
17
  import java.security.GeneralSecurityException;
15
18
 
16
19
  import org.embulk.config.TaskReport;
@@ -114,6 +117,13 @@ public class GcsFileInputPlugin
114
117
  }
115
118
  }
116
119
 
120
+ // @see https://cloud.google.com/storage/docs/bucket-naming
121
+ if (task.getLastPath().isPresent()) {
122
+ if (task.getLastPath().get().length() >= 128) {
123
+ throw new ConfigException("last_path length is allowed between 1 and 1024 bytes");
124
+ }
125
+ }
126
+
117
127
  Storage client = newGcsClient(task, newGcsAuth(task));
118
128
 
119
129
  // list files recursively
@@ -209,7 +219,7 @@ public class GcsFileInputPlugin
209
219
  {
210
220
  ImmutableList.Builder<String> builder = ImmutableList.builder();
211
221
 
212
- String lastKey = lastPath.orNull();
222
+ String lastKey = lastPath.isPresent() ? base64Encode(lastPath.get()) : null;
213
223
 
214
224
  // @see https://cloud.google.com/storage/docs/json_api/v1/objects#resource
215
225
  try {
@@ -250,6 +260,10 @@ public class GcsFileInputPlugin
250
260
  listObjects.setPageToken(lastKey);
251
261
  } while (lastKey != null);
252
262
  } catch (IOException e) {
263
+ if ((e instanceof HttpResponseException) && ((HttpResponseException) e).getStatusCode() == 400) {
264
+ throw new ConfigException(String.format("Files listing failed: bucket:%s, prefix:%s, last_path:%s", bucket, prefix, lastKey), e);
265
+ }
266
+
253
267
  log.warn(String.format("Could not get file list from bucket:%s", bucket));
254
268
  log.warn(e.getMessage());
255
269
  }
@@ -315,6 +329,24 @@ public class GcsFileInputPlugin
315
329
  public void close() { }
316
330
  }
317
331
 
332
+ // String nextToken = base64Encode(0x0a + 0x01~0x27 + filePath);
333
+ private static String base64Encode(String path)
334
+ {
335
+ byte[] encoding;
336
+ byte[] utf8 = path.getBytes(Charsets.UTF_8);
337
+ log.debug(String.format("path string: %s ,path length:%s \" + ", path, utf8.length));
338
+
339
+ encoding = new byte[utf8.length + 2];
340
+ encoding[0] = 0x0a;
341
+ encoding[1] = new Byte(String.valueOf(path.length()));
342
+ System.arraycopy(utf8, 0, encoding, 2, utf8.length);
343
+
344
+ String s = BaseEncoding.base64().encode(encoding);
345
+ log.debug(String.format("last_path(base64 encoded): %s" ,s));
346
+ return s;
347
+ }
348
+
349
+
318
350
  public enum AuthMethod
319
351
  {
320
352
  private_key("private_key"),
@@ -157,6 +157,22 @@ public class TestGcsFileInputPlugin
157
157
  runner.transaction(config, new Control());
158
158
  }
159
159
 
160
+ // last_path length is too long
161
+ @Test(expected = ConfigException.class)
162
+ public void checkDefaultValuesLongLastPath()
163
+ {
164
+ ConfigSource config = Exec.newConfigSource()
165
+ .set("bucket", GCP_BUCKET)
166
+ .set("path_prefix", "my-prefix")
167
+ .set("auth_method", "json_key")
168
+ .set("service_account_email", GCP_EMAIL)
169
+ .set("json_keyfile", null)
170
+ .set("last_path", "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc128")
171
+ .set("parser", parserConfig(schemaConfig()));
172
+
173
+ runner.transaction(config, new Control());
174
+ }
175
+
160
176
  @Test
161
177
  public void testGcsClientCreateSuccessfully()
162
178
  throws GeneralSecurityException, IOException, NoSuchMethodException,
@@ -276,6 +292,22 @@ public class TestGcsFileInputPlugin
276
292
  assertRecords(config, output);
277
293
  }
278
294
 
295
+ @Test
296
+ public void testBase64()
297
+ throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
298
+ {
299
+ Method method = GcsFileInputPlugin.class.getDeclaredMethod("base64Encode", String.class);
300
+ method.setAccessible(true);
301
+
302
+ assertEquals("CgFj", method.invoke(plugin, "c"));
303
+ assertEquals("CgJjMg==", method.invoke(plugin, "c2"));
304
+ assertEquals("Cgh0ZXN0LmNzdg==", method.invoke(plugin, "test.csv"));
305
+ assertEquals("ChZnY3MtdGVzdC9zYW1wbGVfMDEuY3N2", method.invoke(plugin, "gcs-test/sample_01.csv"));
306
+ String params = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc127";
307
+ String expected = "Cn9jY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjMTI3";
308
+ assertEquals(expected, method.invoke(plugin, params));
309
+ }
310
+
279
311
  public ConfigSource config()
280
312
  {
281
313
  return Exec.newConfigSource()
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-30 00:00:00.000000000 Z
11
+ date: 2015-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -63,7 +63,7 @@ files:
63
63
  - src/test/resources/sample_02.csv
64
64
  - classpath/commons-codec-1.3.jar
65
65
  - classpath/commons-logging-1.1.1.jar
66
- - classpath/embulk-input-gcs-0.1.9.jar
66
+ - classpath/embulk-input-gcs-0.1.10.jar
67
67
  - classpath/google-api-client-1.19.1.jar
68
68
  - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
69
69
  - classpath/google-http-client-1.19.0.jar