embulk-output-gcs 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b41fdc69131fe0099ff844875bafa6d160510e10
4
- data.tar.gz: 5b084d56f12509c0c273ea295519a1534d310579
3
+ metadata.gz: c0d6f9617394f320742be74ac318d99ef0f93c06
4
+ data.tar.gz: af9f3a3679a2d76b5c2944fd7e8264ecab3b3a27
5
5
  SHA512:
6
- metadata.gz: a7f51eec122063edaffe94aa065f9bdd1dcc09752d46296f3f8d276d5af250ef82f1a2f56b7e0163f646c98731954d4bf5ce2b901f4e22b4e09033fd20fbf627
7
- data.tar.gz: 7ed822140abca79f035a9bf59af3d472472b6bdfa4c1db505a22ae4ca0a80ca1fd380635487f72eb8fd74f706fd1e531fdf421f63b1614f6fcc5731b319c1489
6
+ metadata.gz: 086960d449e95821defa56b6860eaf2a45fe753db3c7d763272c694ae5fd87b07aba937bb170f748b6d779775e71145c9ff350528d12ea6d770e64d40b67f3ec
7
+ data.tar.gz: 99a7bc4c683d61b500780c316be1c7861181cccde8f20daf6b674e96068a2fc9414db5b830013c0f6c223b33d2cc7a4d164bc62536ed0082ba419754ac8a7980
data/.travis.yml CHANGED
@@ -1,8 +1,6 @@
1
1
  language: java
2
2
  jdk:
3
3
  - oraclejdk8
4
- - oraclejdk7
5
- - openjdk7
6
4
  env:
7
5
  global:
8
6
  - GCP_EMAIL=account-2@embulk-output-gcs-test.iam.gserviceaccount.com
data/build.gradle CHANGED
@@ -14,10 +14,10 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- sourceCompatibility = 1.7
18
- targetCompatibility = 1.7
17
+ sourceCompatibility = 1.8
18
+ targetCompatibility = 1.8
19
19
 
20
- version = "0.4.1"
20
+ version = "0.4.2"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.6"
Binary file
@@ -1,6 +1,6 @@
1
- #Wed Jan 13 12:41:02 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -0,0 +1,20 @@
1
+ package org.embulk.output;
2
+
3
+ public enum AuthMethod
4
+ {
5
+ private_key("private_key"),
6
+ compute_engine("compute_engine"),
7
+ json_key("json_key");
8
+
9
+ private final String string;
10
+
11
+ AuthMethod(String string)
12
+ {
13
+ this.string = string;
14
+ }
15
+
16
+ public String getString()
17
+ {
18
+ return string;
19
+ }
20
+ }
@@ -1,102 +1,25 @@
1
1
  package org.embulk.output;
2
2
 
3
- import com.google.api.client.http.InputStreamContent;
4
- import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
5
3
  import com.google.api.services.storage.Storage;
6
- import com.google.api.services.storage.model.StorageObject;
7
4
  import com.google.common.base.Function;
8
5
  import com.google.common.base.Optional;
9
6
  import com.google.common.base.Throwables;
10
- import org.embulk.config.Config;
11
- import org.embulk.config.ConfigDefault;
12
7
  import org.embulk.config.ConfigDiff;
13
8
  import org.embulk.config.ConfigException;
14
9
  import org.embulk.config.ConfigSource;
15
- import org.embulk.config.Task;
16
10
  import org.embulk.config.TaskReport;
17
11
  import org.embulk.config.TaskSource;
18
- import org.embulk.spi.Buffer;
19
12
  import org.embulk.spi.Exec;
20
13
  import org.embulk.spi.FileOutputPlugin;
21
14
  import org.embulk.spi.TransactionalFileOutput;
22
15
  import org.embulk.spi.unit.LocalFile;
23
- import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
24
- import org.embulk.spi.util.RetryExecutor.Retryable;
25
- import org.slf4j.Logger;
26
- import static org.embulk.spi.util.RetryExecutor.retryExecutor;
27
16
 
28
- import java.io.BufferedInputStream;
29
- import java.io.BufferedOutputStream;
30
- import java.io.File;
31
- import java.io.FileInputStream;
32
- import java.io.FileOutputStream;
33
17
  import java.io.IOException;
34
- import java.io.InterruptedIOException;
35
18
  import java.security.GeneralSecurityException;
36
- import java.security.MessageDigest;
37
- import java.security.NoSuchAlgorithmException;
38
- import java.util.ArrayList;
39
19
  import java.util.List;
40
- import java.util.concurrent.Callable;
41
- import java.util.concurrent.ExecutionException;
42
- import java.util.concurrent.ExecutorService;
43
- import java.util.concurrent.Executors;
44
- import java.util.concurrent.Future;
45
20
 
46
21
  public class GcsOutputPlugin implements FileOutputPlugin
47
22
  {
48
- private static final Logger logger = Exec.getLogger(GcsOutputPlugin.class);
49
-
50
- public interface PluginTask extends Task
51
- {
52
- @Config("bucket")
53
- String getBucket();
54
-
55
- @Config("path_prefix")
56
- String getPathPrefix();
57
-
58
- @Config("file_ext")
59
- String getFileNameExtension();
60
-
61
- @Config("sequence_format")
62
- @ConfigDefault("\".%03d.%02d\"")
63
- String getSequenceFormat();
64
-
65
- @Config("content_type")
66
- @ConfigDefault("\"application/octet-stream\"")
67
- String getContentType();
68
-
69
- @Config("auth_method")
70
- @ConfigDefault("\"private_key\"")
71
- AuthMethod getAuthMethod();
72
-
73
- @Config("service_account_email")
74
- @ConfigDefault("null")
75
- Optional<String> getServiceAccountEmail();
76
-
77
- // kept for backward compatibility
78
- @Config("p12_keyfile_path")
79
- @ConfigDefault("null")
80
- Optional<String> getP12KeyfilePath();
81
-
82
- @Config("p12_keyfile")
83
- @ConfigDefault("null")
84
- Optional<LocalFile> getP12Keyfile();
85
- void setP12Keyfile(Optional<LocalFile> p12Keyfile);
86
-
87
- @Config("json_keyfile")
88
- @ConfigDefault("null")
89
- Optional<LocalFile> getJsonKeyfile();
90
-
91
- @Config("application_name")
92
- @ConfigDefault("\"embulk-output-gcs\"")
93
- String getApplicationName();
94
-
95
- @Config("max_connection_retry")
96
- @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
97
- int getMaxConnectionRetry();
98
- }
99
-
100
23
  @Override
101
24
  public ConfigDiff transaction(ConfigSource config,
102
25
  int taskCount,
@@ -152,7 +75,7 @@ public class GcsOutputPlugin implements FileOutputPlugin
152
75
  PluginTask task = taskSource.loadTask(PluginTask.class);
153
76
 
154
77
  Storage client = createClient(task);
155
- return new TransactionalGcsFileOutput(task, client, taskIndex);
78
+ return new GcsTransactionalFileOutput(task, client, taskIndex);
156
79
  }
157
80
 
158
81
  private GcsAuthentication newGcsAuth(PluginTask task)
@@ -192,262 +115,4 @@ public class GcsOutputPlugin implements FileOutputPlugin
192
115
  }
193
116
  };
194
117
  }
195
-
196
- static class TransactionalGcsFileOutput implements TransactionalFileOutput
197
- {
198
- private final int taskIndex;
199
- private final Storage client;
200
- private final String bucket;
201
- private final String pathPrefix;
202
- private final String pathSuffix;
203
- private final String sequenceFormat;
204
- private final String contentType;
205
- private final int maxConnectionRetry;
206
- private final List<StorageObject> storageObjects = new ArrayList<>();
207
-
208
- private int fileIndex = 0;
209
- private int callCount = 0;
210
- private BufferedOutputStream currentStream = null;
211
- private Future<StorageObject> currentUpload = null;
212
- private File tempFile = null;
213
-
214
- TransactionalGcsFileOutput(PluginTask task, Storage client, int taskIndex)
215
- {
216
- this.taskIndex = taskIndex;
217
- this.client = client;
218
- this.bucket = task.getBucket();
219
- this.pathPrefix = task.getPathPrefix();
220
- this.pathSuffix = task.getFileNameExtension();
221
- this.sequenceFormat = task.getSequenceFormat();
222
- this.contentType = task.getContentType();
223
- this.maxConnectionRetry = task.getMaxConnectionRetry();
224
- }
225
-
226
- public void nextFile()
227
- {
228
- closeCurrentUpload();
229
- try {
230
- tempFile = Exec.getTempFileSpace().createTempFile();
231
- currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
232
- fileIndex++;
233
- }
234
- catch (IOException ex) {
235
- Throwables.propagate(ex);
236
- }
237
- }
238
-
239
- @Override
240
- public void add(Buffer buffer)
241
- {
242
- try {
243
- logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
244
- currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
245
- callCount++;
246
- }
247
- catch (IOException ex) {
248
- throw new RuntimeException(ex);
249
- }
250
- finally {
251
- buffer.release();
252
- }
253
- }
254
-
255
- @Override
256
- public void finish()
257
- {
258
- String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
259
- close();
260
- if (tempFile != null) {
261
- currentUpload = startUpload(path);
262
- }
263
-
264
- closeCurrentUpload();
265
- }
266
-
267
- @Override
268
- public void close()
269
- {
270
- try {
271
- if (currentStream != null) {
272
- currentStream.close();
273
- currentStream = null;
274
- }
275
- }
276
- catch (IOException ex) {
277
- throw Throwables.propagate(ex);
278
- }
279
- }
280
-
281
- @Override
282
- public void abort()
283
- {
284
- }
285
-
286
- @Override
287
- public TaskReport commit()
288
- {
289
- TaskReport report = Exec.newTaskReport();
290
- report.set("files", storageObjects);
291
- return report;
292
- }
293
-
294
- private void closeCurrentUpload()
295
- {
296
- try {
297
- if (currentUpload != null) {
298
- StorageObject obj = currentUpload.get();
299
- storageObjects.add(obj);
300
- logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
301
- currentUpload = null;
302
- }
303
-
304
- callCount = 0;
305
- }
306
- catch (InterruptedException | ExecutionException ex) {
307
- throw Throwables.propagate(ex);
308
- }
309
- }
310
-
311
- private Future<StorageObject> startUpload(final String path)
312
- {
313
- try {
314
- final ExecutorService executor = Executors.newCachedThreadPool();
315
- final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
316
-
317
- return executor.submit(new Callable<StorageObject>() {
318
- @Override
319
- public StorageObject call() throws IOException
320
- {
321
- try {
322
- logger.info("Uploading '{}/{}'", bucket, path);
323
- return execUploadWithRetry(path, hash);
324
- }
325
- finally {
326
- executor.shutdown();
327
- }
328
- }
329
- });
330
- }
331
- catch (IOException ex) {
332
- throw Throwables.propagate(ex);
333
- }
334
- }
335
-
336
- private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
337
- {
338
- try {
339
- return retryExecutor()
340
- .withRetryLimit(maxConnectionRetry)
341
- .withInitialRetryWait(500)
342
- .withMaxRetryWait(30 * 1000)
343
- .runInterruptible(new Retryable<StorageObject>() {
344
- @Override
345
- public StorageObject call() throws IOException, RetryGiveupException
346
- {
347
- try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
348
- InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
349
- mediaContent.setCloseInputStream(true);
350
-
351
- StorageObject objectMetadata = new StorageObject();
352
- objectMetadata.setName(path);
353
-
354
- final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
355
- insert.setDisableGZipContent(true);
356
- StorageObject obj = insert.execute();
357
-
358
- logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
359
- return obj;
360
- }
361
- }
362
-
363
- @Override
364
- public boolean isRetryableException(Exception exception)
365
- {
366
- return true;
367
- }
368
-
369
- @Override
370
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
371
- {
372
- String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
373
- retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
374
- if (retryCount % 3 == 0) {
375
- logger.warn(message, exception);
376
- }
377
- else {
378
- logger.warn(message);
379
- }
380
- }
381
-
382
- @Override
383
- public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
384
- {
385
- }
386
- });
387
- }
388
- catch (RetryGiveupException ex) {
389
- throw Throwables.propagate(ex.getCause());
390
- }
391
- catch (InterruptedException ex) {
392
- throw new InterruptedIOException();
393
- }
394
- }
395
-
396
- /*
397
- MD5 hash sum on GCS bucket is encoded with base64.
398
- You can get same hash with following commands.
399
- $ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
400
- or
401
- $ gsutil hash -m /path/to/file.txt
402
- */
403
- private String getLocalMd5hash(String filePath) throws IOException
404
- {
405
- try {
406
- MessageDigest md = MessageDigest.getInstance("MD5");
407
- try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
408
- byte[] buffer = new byte[256];
409
- int len;
410
- while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
411
- md.update(buffer, 0, len);
412
- }
413
- return new String(Base64.encodeBase64(md.digest()));
414
- }
415
- }
416
- catch (NoSuchAlgorithmException ex) {
417
- throw new ConfigException("MD5 algorism not found");
418
- }
419
- }
420
- }
421
-
422
- /**
423
- * GCS has character limitation in object names.
424
- * @see https://cloud.google.com/storage/docs/naming#objectnames
425
- * Although "." isn't listed at above pages, we can't access "./" path from GUI console.
426
- * And in many cases, user don't intend of creating "/" directory under the bucket.
427
- * This method normalizes path when it contains "./" and "/" and its variations at the beginning
428
- */
429
- private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
430
- {
431
- String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
432
- return path.replaceFirst("^\\.*/*", "");
433
- }
434
-
435
- public enum AuthMethod
436
- {
437
- private_key("private_key"),
438
- compute_engine("compute_engine"),
439
- json_key("json_key");
440
-
441
- private final String string;
442
-
443
- AuthMethod(String string)
444
- {
445
- this.string = string;
446
- }
447
-
448
- public String getString()
449
- {
450
- return string;
451
- }
452
- }
453
118
  }
@@ -0,0 +1,252 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.api.client.http.InputStreamContent;
4
+ import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
5
+ import com.google.api.services.storage.Storage;
6
+ import com.google.api.services.storage.model.StorageObject;
7
+ import com.google.common.base.Throwables;
8
+ import org.embulk.config.ConfigException;
9
+ import org.embulk.config.TaskReport;
10
+ import org.embulk.spi.Buffer;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.TransactionalFileOutput;
13
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
14
+ import org.embulk.spi.util.RetryExecutor.Retryable;
15
+ import org.slf4j.Logger;
16
+
17
+ import java.io.BufferedInputStream;
18
+ import java.io.BufferedOutputStream;
19
+ import java.io.File;
20
+ import java.io.FileInputStream;
21
+ import java.io.FileOutputStream;
22
+ import java.io.IOException;
23
+ import java.io.InterruptedIOException;
24
+ import java.security.MessageDigest;
25
+ import java.security.NoSuchAlgorithmException;
26
+ import java.util.ArrayList;
27
+ import java.util.List;
28
+
29
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
30
+
31
+ public class GcsTransactionalFileOutput implements TransactionalFileOutput
32
+ {
33
+ private static final Logger logger = Exec.getLogger(GcsTransactionalFileOutput.class);
34
+
35
+ private final int taskIndex;
36
+ private final Storage client;
37
+ private final String bucket;
38
+ private final String pathPrefix;
39
+ private final String pathSuffix;
40
+ private final String sequenceFormat;
41
+ private final String contentType;
42
+ private final int maxConnectionRetry;
43
+ private final List<StorageObject> storageObjects = new ArrayList<>();
44
+
45
+ private int fileIndex = 0;
46
+ private int callCount = 0;
47
+ private BufferedOutputStream currentStream = null;
48
+ private StorageObject currentUpload = null;
49
+ private File tempFile = null;
50
+
51
+ GcsTransactionalFileOutput(PluginTask task, Storage client, int taskIndex)
52
+ {
53
+ this.taskIndex = taskIndex;
54
+ this.client = client;
55
+ this.bucket = task.getBucket();
56
+ this.pathPrefix = task.getPathPrefix();
57
+ this.pathSuffix = task.getFileNameExtension();
58
+ this.sequenceFormat = task.getSequenceFormat();
59
+ this.contentType = task.getContentType();
60
+ this.maxConnectionRetry = task.getMaxConnectionRetry();
61
+ }
62
+
63
+ public void nextFile()
64
+ {
65
+ closeCurrentUpload();
66
+ try {
67
+ tempFile = Exec.getTempFileSpace().createTempFile();
68
+ currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
69
+ fileIndex++;
70
+ }
71
+ catch (IOException ex) {
72
+ Throwables.propagate(ex);
73
+ }
74
+ }
75
+
76
+ @Override
77
+ public void add(Buffer buffer)
78
+ {
79
+ try {
80
+ logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
81
+ currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
82
+ callCount++;
83
+ }
84
+ catch (IOException ex) {
85
+ throw new RuntimeException(ex);
86
+ }
87
+ finally {
88
+ buffer.release();
89
+ }
90
+ }
91
+
92
+ @Override
93
+ public void finish()
94
+ {
95
+ String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
96
+ close();
97
+ if (tempFile != null) {
98
+ currentUpload = startUpload(path);
99
+ }
100
+
101
+ closeCurrentUpload();
102
+ }
103
+
104
+ @Override
105
+ public void close()
106
+ {
107
+ try {
108
+ if (currentStream != null) {
109
+ currentStream.close();
110
+ currentStream = null;
111
+ }
112
+ }
113
+ catch (IOException ex) {
114
+ throw Throwables.propagate(ex);
115
+ }
116
+ }
117
+
118
+ @Override
119
+ public void abort()
120
+ {
121
+ }
122
+
123
+ @Override
124
+ public TaskReport commit()
125
+ {
126
+ TaskReport report = Exec.newTaskReport();
127
+ report.set("files", storageObjects);
128
+ return report;
129
+ }
130
+
131
+ private void closeCurrentUpload()
132
+ {
133
+ if (currentUpload != null) {
134
+ StorageObject obj = currentUpload;
135
+ storageObjects.add(obj);
136
+ logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
137
+ currentUpload = null;
138
+ }
139
+
140
+ callCount = 0;
141
+ }
142
+
143
+ private StorageObject startUpload(final String path)
144
+ {
145
+ try {
146
+ final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
147
+
148
+ return execUploadWithRetry(path, hash);
149
+ }
150
+ catch (IOException ex) {
151
+ throw Throwables.propagate(ex);
152
+ }
153
+ }
154
+
155
+ private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
156
+ {
157
+ try {
158
+ return retryExecutor()
159
+ .withRetryLimit(maxConnectionRetry)
160
+ .withInitialRetryWait(500)
161
+ .withMaxRetryWait(30 * 1000)
162
+ .runInterruptible(new Retryable<StorageObject>() {
163
+ @Override
164
+ public StorageObject call() throws IOException
165
+ {
166
+ try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
167
+ InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
168
+ mediaContent.setCloseInputStream(true);
169
+
170
+ StorageObject objectMetadata = new StorageObject();
171
+ objectMetadata.setName(path);
172
+
173
+ final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
174
+ insert.setDisableGZipContent(true);
175
+ StorageObject obj = insert.execute();
176
+
177
+ logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
178
+ return obj;
179
+ }
180
+ }
181
+
182
+ @Override
183
+ public boolean isRetryableException(Exception exception)
184
+ {
185
+ return true;
186
+ }
187
+
188
+ @Override
189
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
190
+ {
191
+ String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
192
+ retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
193
+ if (retryCount % 3 == 0) {
194
+ logger.warn(message, exception);
195
+ }
196
+ else {
197
+ logger.warn(message);
198
+ }
199
+ }
200
+
201
+ @Override
202
+ public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
203
+ {
204
+ }
205
+ });
206
+ }
207
+ catch (RetryGiveupException ex) {
208
+ throw Throwables.propagate(ex.getCause());
209
+ }
210
+ catch (InterruptedException ex) {
211
+ throw new InterruptedIOException();
212
+ }
213
+ }
214
+
215
+ /*
216
+ MD5 hash sum on GCS bucket is encoded with base64.
217
+ You can get same hash with following commands.
218
+ $ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
219
+ or
220
+ $ gsutil hash -m /path/to/file.txt
221
+ */
222
+ private String getLocalMd5hash(String filePath) throws IOException
223
+ {
224
+ try {
225
+ MessageDigest md = MessageDigest.getInstance("MD5");
226
+ try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
227
+ byte[] buffer = new byte[256];
228
+ int len;
229
+ while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
230
+ md.update(buffer, 0, len);
231
+ }
232
+ return new String(Base64.encodeBase64(md.digest()));
233
+ }
234
+ }
235
+ catch (NoSuchAlgorithmException ex) {
236
+ throw new ConfigException("MD5 algorism not found");
237
+ }
238
+ }
239
+
240
+ /**
241
+ * GCS has character limitation in object names.
242
+ * @see https://cloud.google.com/storage/docs/naming#objectnames
243
+ * Although "." isn't listed at above pages, we can't access "./" path from GUI console.
244
+ * And in many cases, user don't intend of creating "/" directory under the bucket.
245
+ * This method normalizes path when it contains "./" and "/" and its variations at the beginning
246
+ */
247
+ private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
248
+ {
249
+ String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
250
+ return path.replaceFirst("^\\.*/*", "");
251
+ }
252
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.spi.unit.LocalFile;
8
+
9
+ public interface PluginTask extends Task
10
+ {
11
+ @Config("bucket")
12
+ String getBucket();
13
+
14
+ @Config("path_prefix")
15
+ String getPathPrefix();
16
+
17
+ @Config("file_ext")
18
+ String getFileNameExtension();
19
+
20
+ @Config("sequence_format")
21
+ @ConfigDefault("\".%03d.%02d\"")
22
+ String getSequenceFormat();
23
+
24
+ @Config("content_type")
25
+ @ConfigDefault("\"application/octet-stream\"")
26
+ String getContentType();
27
+
28
+ @Config("auth_method")
29
+ @ConfigDefault("\"private_key\"")
30
+ AuthMethod getAuthMethod();
31
+
32
+ @Config("service_account_email")
33
+ @ConfigDefault("null")
34
+ Optional<String> getServiceAccountEmail();
35
+
36
+ // kept for backward compatibility
37
+ @Config("p12_keyfile_path")
38
+ @ConfigDefault("null")
39
+ Optional<String> getP12KeyfilePath();
40
+
41
+ @Config("p12_keyfile")
42
+ @ConfigDefault("null")
43
+ Optional<LocalFile> getP12Keyfile();
44
+ void setP12Keyfile(Optional<LocalFile> p12Keyfile);
45
+
46
+ @Config("json_keyfile")
47
+ @ConfigDefault("null")
48
+ Optional<LocalFile> getJsonKeyfile();
49
+
50
+ @Config("application_name")
51
+ @ConfigDefault("\"embulk-output-gcs\"")
52
+ String getApplicationName();
53
+
54
+ @Config("max_connection_retry")
55
+ @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
56
+ int getMaxConnectionRetry();
57
+ }
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
11
11
  import org.embulk.config.ConfigSource;
12
12
  import org.embulk.config.TaskReport;
13
13
  import org.embulk.config.TaskSource;
14
- import org.embulk.output.GcsOutputPlugin.PluginTask;
15
14
  import org.embulk.spi.Buffer;
16
15
  import org.embulk.spi.Exec;
17
16
  import org.embulk.spi.FileOutputPlugin;
@@ -98,7 +97,7 @@ public class TestGcsOutputPlugin
98
97
  .set("file_ext", ".csv")
99
98
  .set("formatter", formatterConfig());
100
99
 
101
- GcsOutputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
100
+ PluginTask task = config.loadConfig(PluginTask.class);
102
101
  assertEquals("private_key", task.getAuthMethod().toString());
103
102
  }
104
103
 
@@ -284,7 +283,7 @@ public class TestGcsOutputPlugin
284
283
  {
285
284
  ConfigSource configSource = config();
286
285
  PluginTask task = configSource.loadConfig(PluginTask.class);
287
- Method method = GcsOutputPlugin.class.getDeclaredMethod("generateRemotePath", String.class, String.class, int.class, int.class, String.class);
286
+ Method method = GcsTransactionalFileOutput.class.getDeclaredMethod("generateRemotePath", String.class, String.class, int.class, int.class, String.class);
288
287
  method.setAccessible(true);
289
288
  assertEquals("sample.000.01.csv", method.invoke(plugin, "/sample", task.getSequenceFormat(), 0, 1, ".csv"));
290
289
  assertEquals("sample.000.01.csv", method.invoke(plugin, "./sample", task.getSequenceFormat(), 0, 1, ".csv"));
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuyuki Honda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-29 00:00:00.000000000 Z
11
+ date: 2018-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,24 +62,27 @@ files:
62
62
  - gradlew
63
63
  - gradlew.bat
64
64
  - lib/embulk/output/gcs.rb
65
+ - src/main/java/org/embulk/output/AuthMethod.java
65
66
  - src/main/java/org/embulk/output/GcsAuthentication.java
66
67
  - src/main/java/org/embulk/output/GcsOutputPlugin.java
68
+ - src/main/java/org/embulk/output/GcsTransactionalFileOutput.java
69
+ - src/main/java/org/embulk/output/PluginTask.java
67
70
  - src/test/java/org/embulk/output/TestGcsAuthentication.java
68
71
  - src/test/java/org/embulk/output/TestGcsOutputPlugin.java
69
72
  - src/test/resources/keys.tar.enc
70
73
  - src/test/resources/sample_01.csv
71
74
  - src/test/resources/sample_02.csv
72
- - classpath/commons-codec-1.3.jar
73
- - classpath/commons-logging-1.1.1.jar
74
- - classpath/embulk-output-gcs-0.4.1.jar
75
- - classpath/google-api-client-1.19.1.jar
76
75
  - classpath/google-api-services-storage-v1-rev28-1.19.1.jar
76
+ - classpath/httpclient-4.0.1.jar
77
+ - classpath/jsr305-1.3.9.jar
78
+ - classpath/embulk-output-gcs-0.4.2.jar
79
+ - classpath/commons-logging-1.1.1.jar
77
80
  - classpath/google-http-client-1.19.0.jar
81
+ - classpath/google-api-client-1.19.1.jar
82
+ - classpath/commons-codec-1.3.jar
83
+ - classpath/httpcore-4.0.1.jar
78
84
  - classpath/google-http-client-jackson2-1.19.0.jar
79
85
  - classpath/google-oauth-client-1.19.0.jar
80
- - classpath/httpclient-4.0.1.jar
81
- - classpath/httpcore-4.0.1.jar
82
- - classpath/jsr305-1.3.9.jar
83
86
  homepage: https://github.com/hakobera/embulk-output-gcs
84
87
  licenses:
85
88
  - MIT