embulk-input-gcs 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d25891998dc1d0794b0fcfac04ea7a011f777f9c
4
- data.tar.gz: cd7d9977e6bfa8df5212181ba49aaacf7998110b
3
+ metadata.gz: fd9bbc407e6e93967a7d08fcd72b4d4f0c419659
4
+ data.tar.gz: 86ebeaf4dace9de06146a4671d6fe3db69089b5c
5
5
  SHA512:
6
- metadata.gz: 9beb8bfab93341c05b076fb1937096cb86ab850da685806cd1aa29cf65ffea356e318bfdddc566321499e4ffa8b5ccac9c8028b0dd83aa20f29a6955c8556e9a
7
- data.tar.gz: d3f872c0dd1460e5234b836761fbd7ea5f723b17371d881e4a91989b62edd10cfa98a0110eb26af5940c684a318164b1448f0ccd73962bc5db126b5c60070e8b
6
+ metadata.gz: 783728ec908c8f598acfac3ec555075a55b7249943afc76d030f2fe57d9353ddbb78cbecabdba9504189d943b9574555d3ba301a4ee654f5c78b81c179abdef4
7
+ data.tar.gz: 0e8815c728a060f749e8d797938235b72eed889c2155b635256646e5816432326555648cf229ef76a01e969aca6ec659eb5ce976c43f32ee64f6a3d3ce451d69
data/ChangeLog CHANGED
@@ -1,6 +1,11 @@
1
+ Release 0.1.10 - 2015-11-07
2
+
3
+ * Fix resume download logics
4
+ * Throw ConfigException when files listing failed. @muga thanks!
5
+
1
6
  Release 0.1.9 - 2015-10-30
2
7
 
3
- Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks!
8
+ * Fix GcsAuthentication object initialization for mapreduce executor. @muga thanks!
4
9
 
5
10
  Release 0.1.8 - 2015-10-29
6
11
 
@@ -27,4 +32,4 @@ Release 0.1.4 - 2015-06-27
27
32
 
28
33
  Release 0.1.3 - 2015-03-16
29
34
 
30
- * Changed supported Java version from 8 to 7
35
+ * Changed supported Java version from 8 to 7
@@ -16,7 +16,7 @@ configurations {
16
16
  sourceCompatibility = 1.7
17
17
  targetCompatibility = 1.7
18
18
 
19
- version = "0.1.9"
19
+ version = "0.1.10"
20
20
 
21
21
  dependencies {
22
22
  compile "org.embulk:embulk-core:0.7.5"
@@ -7,10 +7,13 @@ import java.io.IOException;
7
7
  import java.io.InputStream;
8
8
  import java.math.BigInteger;
9
9
 
10
+ import com.google.api.client.http.HttpResponseException;
10
11
  import com.google.common.collect.ImmutableList;
11
12
  import com.google.common.base.Optional;
12
13
  import com.google.common.base.Function;
13
14
  import com.google.common.base.Throwables;
15
+ import com.google.common.base.Charsets;
16
+ import com.google.common.io.BaseEncoding;
14
17
  import java.security.GeneralSecurityException;
15
18
 
16
19
  import org.embulk.config.TaskReport;
@@ -114,6 +117,13 @@ public class GcsFileInputPlugin
114
117
  }
115
118
  }
116
119
 
120
+ // @see https://cloud.google.com/storage/docs/bucket-naming
121
+ if (task.getLastPath().isPresent()) {
122
+ if (task.getLastPath().get().length() >= 128) {
123
+ throw new ConfigException("last_path length is allowed between 1 and 1024 bytes");
124
+ }
125
+ }
126
+
117
127
  Storage client = newGcsClient(task, newGcsAuth(task));
118
128
 
119
129
  // list files recursively
@@ -209,7 +219,7 @@ public class GcsFileInputPlugin
209
219
  {
210
220
  ImmutableList.Builder<String> builder = ImmutableList.builder();
211
221
 
212
- String lastKey = lastPath.orNull();
222
+ String lastKey = lastPath.isPresent() ? base64Encode(lastPath.get()) : null;
213
223
 
214
224
  // @see https://cloud.google.com/storage/docs/json_api/v1/objects#resource
215
225
  try {
@@ -250,6 +260,10 @@ public class GcsFileInputPlugin
250
260
  listObjects.setPageToken(lastKey);
251
261
  } while (lastKey != null);
252
262
  } catch (IOException e) {
263
+ if ((e instanceof HttpResponseException) && ((HttpResponseException) e).getStatusCode() == 400) {
264
+ throw new ConfigException(String.format("Files listing failed: bucket:%s, prefix:%s, last_path:%s", bucket, prefix, lastKey), e);
265
+ }
266
+
253
267
  log.warn(String.format("Could not get file list from bucket:%s", bucket));
254
268
  log.warn(e.getMessage());
255
269
  }
@@ -315,6 +329,24 @@ public class GcsFileInputPlugin
315
329
  public void close() { }
316
330
  }
317
331
 
332
+ // String nextToken = base64Encode(0x0a + 0x01~0x27 + filePath);
333
+ private static String base64Encode(String path)
334
+ {
335
+ byte[] encoding;
336
+ byte[] utf8 = path.getBytes(Charsets.UTF_8);
337
+ log.debug(String.format("path string: %s ,path length:%s \" + ", path, utf8.length));
338
+
339
+ encoding = new byte[utf8.length + 2];
340
+ encoding[0] = 0x0a;
341
+ encoding[1] = new Byte(String.valueOf(path.length()));
342
+ System.arraycopy(utf8, 0, encoding, 2, utf8.length);
343
+
344
+ String s = BaseEncoding.base64().encode(encoding);
345
+ log.debug(String.format("last_path(base64 encoded): %s" ,s));
346
+ return s;
347
+ }
348
+
349
+
318
350
  public enum AuthMethod
319
351
  {
320
352
  private_key("private_key"),
@@ -157,6 +157,22 @@ public class TestGcsFileInputPlugin
157
157
  runner.transaction(config, new Control());
158
158
  }
159
159
 
160
+ // last_path length is too long
161
+ @Test(expected = ConfigException.class)
162
+ public void checkDefaultValuesLongLastPath()
163
+ {
164
+ ConfigSource config = Exec.newConfigSource()
165
+ .set("bucket", GCP_BUCKET)
166
+ .set("path_prefix", "my-prefix")
167
+ .set("auth_method", "json_key")
168
+ .set("service_account_email", GCP_EMAIL)
169
+ .set("json_keyfile", null)
170
+ .set("last_path", "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc128")
171
+ .set("parser", parserConfig(schemaConfig()));
172
+
173
+ runner.transaction(config, new Control());
174
+ }
175
+
160
176
  @Test
161
177
  public void testGcsClientCreateSuccessfully()
162
178
  throws GeneralSecurityException, IOException, NoSuchMethodException,
@@ -276,6 +292,22 @@ public class TestGcsFileInputPlugin
276
292
  assertRecords(config, output);
277
293
  }
278
294
 
295
+ @Test
296
+ public void testBase64()
297
+ throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
298
+ {
299
+ Method method = GcsFileInputPlugin.class.getDeclaredMethod("base64Encode", String.class);
300
+ method.setAccessible(true);
301
+
302
+ assertEquals("CgFj", method.invoke(plugin, "c"));
303
+ assertEquals("CgJjMg==", method.invoke(plugin, "c2"));
304
+ assertEquals("Cgh0ZXN0LmNzdg==", method.invoke(plugin, "test.csv"));
305
+ assertEquals("ChZnY3MtdGVzdC9zYW1wbGVfMDEuY3N2", method.invoke(plugin, "gcs-test/sample_01.csv"));
306
+ String params = "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc127";
307
+ String expected = "Cn9jY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjMTI3";
308
+ assertEquals(expected, method.invoke(plugin, params));
309
+ }
310
+
279
311
  public ConfigSource config()
280
312
  {
281
313
  return Exec.newConfigSource()
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-30 00:00:00.000000000 Z
11
+ date: 2015-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -63,7 +63,7 @@ files:
63
63
  - src/test/resources/sample_02.csv
64
64
  - classpath/commons-codec-1.3.jar
65
65
  - classpath/commons-logging-1.1.1.jar
66
- - classpath/embulk-input-gcs-0.1.9.jar
66
+ - classpath/embulk-input-gcs-0.1.10.jar
67
67
  - classpath/google-api-client-1.19.1.jar
68
68
  - classpath/google-api-services-storage-v1-rev27-1.19.1.jar
69
69
  - classpath/google-http-client-1.19.0.jar