embulk-output-gcs 0.4.1 → 0.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -2
- data/build.gradle +3 -3
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/output/AuthMethod.java +20 -0
- data/src/main/java/org/embulk/output/GcsOutputPlugin.java +1 -336
- data/src/main/java/org/embulk/output/GcsTransactionalFileOutput.java +252 -0
- data/src/main/java/org/embulk/output/PluginTask.java +57 -0
- data/src/test/java/org/embulk/output/TestGcsOutputPlugin.java +2 -3
- metadata +12 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0d6f9617394f320742be74ac318d99ef0f93c06
|
4
|
+
data.tar.gz: af9f3a3679a2d76b5c2944fd7e8264ecab3b3a27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 086960d449e95821defa56b6860eaf2a45fe753db3c7d763272c694ae5fd87b07aba937bb170f748b6d779775e71145c9ff350528d12ea6d770e64d40b67f3ec
|
7
|
+
data.tar.gz: 99a7bc4c683d61b500780c316be1c7861181cccde8f20daf6b674e96068a2fc9414db5b830013c0f6c223b33d2cc7a4d164bc62536ed0082ba419754ac8a7980
|
data/.travis.yml
CHANGED
data/build.gradle
CHANGED
@@ -14,10 +14,10 @@ configurations {
|
|
14
14
|
provided
|
15
15
|
}
|
16
16
|
|
17
|
-
sourceCompatibility = 1.
|
18
|
-
targetCompatibility = 1.
|
17
|
+
sourceCompatibility = 1.8
|
18
|
+
targetCompatibility = 1.8
|
19
19
|
|
20
|
-
version = "0.4.
|
20
|
+
version = "0.4.2"
|
21
21
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.6"
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -0,0 +1,20 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
public enum AuthMethod
|
4
|
+
{
|
5
|
+
private_key("private_key"),
|
6
|
+
compute_engine("compute_engine"),
|
7
|
+
json_key("json_key");
|
8
|
+
|
9
|
+
private final String string;
|
10
|
+
|
11
|
+
AuthMethod(String string)
|
12
|
+
{
|
13
|
+
this.string = string;
|
14
|
+
}
|
15
|
+
|
16
|
+
public String getString()
|
17
|
+
{
|
18
|
+
return string;
|
19
|
+
}
|
20
|
+
}
|
@@ -1,102 +1,25 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import com.google.api.client.http.InputStreamContent;
|
4
|
-
import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
|
5
3
|
import com.google.api.services.storage.Storage;
|
6
|
-
import com.google.api.services.storage.model.StorageObject;
|
7
4
|
import com.google.common.base.Function;
|
8
5
|
import com.google.common.base.Optional;
|
9
6
|
import com.google.common.base.Throwables;
|
10
|
-
import org.embulk.config.Config;
|
11
|
-
import org.embulk.config.ConfigDefault;
|
12
7
|
import org.embulk.config.ConfigDiff;
|
13
8
|
import org.embulk.config.ConfigException;
|
14
9
|
import org.embulk.config.ConfigSource;
|
15
|
-
import org.embulk.config.Task;
|
16
10
|
import org.embulk.config.TaskReport;
|
17
11
|
import org.embulk.config.TaskSource;
|
18
|
-
import org.embulk.spi.Buffer;
|
19
12
|
import org.embulk.spi.Exec;
|
20
13
|
import org.embulk.spi.FileOutputPlugin;
|
21
14
|
import org.embulk.spi.TransactionalFileOutput;
|
22
15
|
import org.embulk.spi.unit.LocalFile;
|
23
|
-
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
24
|
-
import org.embulk.spi.util.RetryExecutor.Retryable;
|
25
|
-
import org.slf4j.Logger;
|
26
|
-
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
27
16
|
|
28
|
-
import java.io.BufferedInputStream;
|
29
|
-
import java.io.BufferedOutputStream;
|
30
|
-
import java.io.File;
|
31
|
-
import java.io.FileInputStream;
|
32
|
-
import java.io.FileOutputStream;
|
33
17
|
import java.io.IOException;
|
34
|
-
import java.io.InterruptedIOException;
|
35
18
|
import java.security.GeneralSecurityException;
|
36
|
-
import java.security.MessageDigest;
|
37
|
-
import java.security.NoSuchAlgorithmException;
|
38
|
-
import java.util.ArrayList;
|
39
19
|
import java.util.List;
|
40
|
-
import java.util.concurrent.Callable;
|
41
|
-
import java.util.concurrent.ExecutionException;
|
42
|
-
import java.util.concurrent.ExecutorService;
|
43
|
-
import java.util.concurrent.Executors;
|
44
|
-
import java.util.concurrent.Future;
|
45
20
|
|
46
21
|
public class GcsOutputPlugin implements FileOutputPlugin
|
47
22
|
{
|
48
|
-
private static final Logger logger = Exec.getLogger(GcsOutputPlugin.class);
|
49
|
-
|
50
|
-
public interface PluginTask extends Task
|
51
|
-
{
|
52
|
-
@Config("bucket")
|
53
|
-
String getBucket();
|
54
|
-
|
55
|
-
@Config("path_prefix")
|
56
|
-
String getPathPrefix();
|
57
|
-
|
58
|
-
@Config("file_ext")
|
59
|
-
String getFileNameExtension();
|
60
|
-
|
61
|
-
@Config("sequence_format")
|
62
|
-
@ConfigDefault("\".%03d.%02d\"")
|
63
|
-
String getSequenceFormat();
|
64
|
-
|
65
|
-
@Config("content_type")
|
66
|
-
@ConfigDefault("\"application/octet-stream\"")
|
67
|
-
String getContentType();
|
68
|
-
|
69
|
-
@Config("auth_method")
|
70
|
-
@ConfigDefault("\"private_key\"")
|
71
|
-
AuthMethod getAuthMethod();
|
72
|
-
|
73
|
-
@Config("service_account_email")
|
74
|
-
@ConfigDefault("null")
|
75
|
-
Optional<String> getServiceAccountEmail();
|
76
|
-
|
77
|
-
// kept for backward compatibility
|
78
|
-
@Config("p12_keyfile_path")
|
79
|
-
@ConfigDefault("null")
|
80
|
-
Optional<String> getP12KeyfilePath();
|
81
|
-
|
82
|
-
@Config("p12_keyfile")
|
83
|
-
@ConfigDefault("null")
|
84
|
-
Optional<LocalFile> getP12Keyfile();
|
85
|
-
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
86
|
-
|
87
|
-
@Config("json_keyfile")
|
88
|
-
@ConfigDefault("null")
|
89
|
-
Optional<LocalFile> getJsonKeyfile();
|
90
|
-
|
91
|
-
@Config("application_name")
|
92
|
-
@ConfigDefault("\"embulk-output-gcs\"")
|
93
|
-
String getApplicationName();
|
94
|
-
|
95
|
-
@Config("max_connection_retry")
|
96
|
-
@ConfigDefault("10") // 10 times retry to connect GCS server if failed.
|
97
|
-
int getMaxConnectionRetry();
|
98
|
-
}
|
99
|
-
|
100
23
|
@Override
|
101
24
|
public ConfigDiff transaction(ConfigSource config,
|
102
25
|
int taskCount,
|
@@ -152,7 +75,7 @@ public class GcsOutputPlugin implements FileOutputPlugin
|
|
152
75
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
153
76
|
|
154
77
|
Storage client = createClient(task);
|
155
|
-
return new
|
78
|
+
return new GcsTransactionalFileOutput(task, client, taskIndex);
|
156
79
|
}
|
157
80
|
|
158
81
|
private GcsAuthentication newGcsAuth(PluginTask task)
|
@@ -192,262 +115,4 @@ public class GcsOutputPlugin implements FileOutputPlugin
|
|
192
115
|
}
|
193
116
|
};
|
194
117
|
}
|
195
|
-
|
196
|
-
static class TransactionalGcsFileOutput implements TransactionalFileOutput
|
197
|
-
{
|
198
|
-
private final int taskIndex;
|
199
|
-
private final Storage client;
|
200
|
-
private final String bucket;
|
201
|
-
private final String pathPrefix;
|
202
|
-
private final String pathSuffix;
|
203
|
-
private final String sequenceFormat;
|
204
|
-
private final String contentType;
|
205
|
-
private final int maxConnectionRetry;
|
206
|
-
private final List<StorageObject> storageObjects = new ArrayList<>();
|
207
|
-
|
208
|
-
private int fileIndex = 0;
|
209
|
-
private int callCount = 0;
|
210
|
-
private BufferedOutputStream currentStream = null;
|
211
|
-
private Future<StorageObject> currentUpload = null;
|
212
|
-
private File tempFile = null;
|
213
|
-
|
214
|
-
TransactionalGcsFileOutput(PluginTask task, Storage client, int taskIndex)
|
215
|
-
{
|
216
|
-
this.taskIndex = taskIndex;
|
217
|
-
this.client = client;
|
218
|
-
this.bucket = task.getBucket();
|
219
|
-
this.pathPrefix = task.getPathPrefix();
|
220
|
-
this.pathSuffix = task.getFileNameExtension();
|
221
|
-
this.sequenceFormat = task.getSequenceFormat();
|
222
|
-
this.contentType = task.getContentType();
|
223
|
-
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
224
|
-
}
|
225
|
-
|
226
|
-
public void nextFile()
|
227
|
-
{
|
228
|
-
closeCurrentUpload();
|
229
|
-
try {
|
230
|
-
tempFile = Exec.getTempFileSpace().createTempFile();
|
231
|
-
currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
|
232
|
-
fileIndex++;
|
233
|
-
}
|
234
|
-
catch (IOException ex) {
|
235
|
-
Throwables.propagate(ex);
|
236
|
-
}
|
237
|
-
}
|
238
|
-
|
239
|
-
@Override
|
240
|
-
public void add(Buffer buffer)
|
241
|
-
{
|
242
|
-
try {
|
243
|
-
logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
|
244
|
-
currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
245
|
-
callCount++;
|
246
|
-
}
|
247
|
-
catch (IOException ex) {
|
248
|
-
throw new RuntimeException(ex);
|
249
|
-
}
|
250
|
-
finally {
|
251
|
-
buffer.release();
|
252
|
-
}
|
253
|
-
}
|
254
|
-
|
255
|
-
@Override
|
256
|
-
public void finish()
|
257
|
-
{
|
258
|
-
String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
|
259
|
-
close();
|
260
|
-
if (tempFile != null) {
|
261
|
-
currentUpload = startUpload(path);
|
262
|
-
}
|
263
|
-
|
264
|
-
closeCurrentUpload();
|
265
|
-
}
|
266
|
-
|
267
|
-
@Override
|
268
|
-
public void close()
|
269
|
-
{
|
270
|
-
try {
|
271
|
-
if (currentStream != null) {
|
272
|
-
currentStream.close();
|
273
|
-
currentStream = null;
|
274
|
-
}
|
275
|
-
}
|
276
|
-
catch (IOException ex) {
|
277
|
-
throw Throwables.propagate(ex);
|
278
|
-
}
|
279
|
-
}
|
280
|
-
|
281
|
-
@Override
|
282
|
-
public void abort()
|
283
|
-
{
|
284
|
-
}
|
285
|
-
|
286
|
-
@Override
|
287
|
-
public TaskReport commit()
|
288
|
-
{
|
289
|
-
TaskReport report = Exec.newTaskReport();
|
290
|
-
report.set("files", storageObjects);
|
291
|
-
return report;
|
292
|
-
}
|
293
|
-
|
294
|
-
private void closeCurrentUpload()
|
295
|
-
{
|
296
|
-
try {
|
297
|
-
if (currentUpload != null) {
|
298
|
-
StorageObject obj = currentUpload.get();
|
299
|
-
storageObjects.add(obj);
|
300
|
-
logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
|
301
|
-
currentUpload = null;
|
302
|
-
}
|
303
|
-
|
304
|
-
callCount = 0;
|
305
|
-
}
|
306
|
-
catch (InterruptedException | ExecutionException ex) {
|
307
|
-
throw Throwables.propagate(ex);
|
308
|
-
}
|
309
|
-
}
|
310
|
-
|
311
|
-
private Future<StorageObject> startUpload(final String path)
|
312
|
-
{
|
313
|
-
try {
|
314
|
-
final ExecutorService executor = Executors.newCachedThreadPool();
|
315
|
-
final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
|
316
|
-
|
317
|
-
return executor.submit(new Callable<StorageObject>() {
|
318
|
-
@Override
|
319
|
-
public StorageObject call() throws IOException
|
320
|
-
{
|
321
|
-
try {
|
322
|
-
logger.info("Uploading '{}/{}'", bucket, path);
|
323
|
-
return execUploadWithRetry(path, hash);
|
324
|
-
}
|
325
|
-
finally {
|
326
|
-
executor.shutdown();
|
327
|
-
}
|
328
|
-
}
|
329
|
-
});
|
330
|
-
}
|
331
|
-
catch (IOException ex) {
|
332
|
-
throw Throwables.propagate(ex);
|
333
|
-
}
|
334
|
-
}
|
335
|
-
|
336
|
-
private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
|
337
|
-
{
|
338
|
-
try {
|
339
|
-
return retryExecutor()
|
340
|
-
.withRetryLimit(maxConnectionRetry)
|
341
|
-
.withInitialRetryWait(500)
|
342
|
-
.withMaxRetryWait(30 * 1000)
|
343
|
-
.runInterruptible(new Retryable<StorageObject>() {
|
344
|
-
@Override
|
345
|
-
public StorageObject call() throws IOException, RetryGiveupException
|
346
|
-
{
|
347
|
-
try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
|
348
|
-
InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
|
349
|
-
mediaContent.setCloseInputStream(true);
|
350
|
-
|
351
|
-
StorageObject objectMetadata = new StorageObject();
|
352
|
-
objectMetadata.setName(path);
|
353
|
-
|
354
|
-
final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
|
355
|
-
insert.setDisableGZipContent(true);
|
356
|
-
StorageObject obj = insert.execute();
|
357
|
-
|
358
|
-
logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
|
359
|
-
return obj;
|
360
|
-
}
|
361
|
-
}
|
362
|
-
|
363
|
-
@Override
|
364
|
-
public boolean isRetryableException(Exception exception)
|
365
|
-
{
|
366
|
-
return true;
|
367
|
-
}
|
368
|
-
|
369
|
-
@Override
|
370
|
-
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
|
371
|
-
{
|
372
|
-
String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
|
373
|
-
retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
|
374
|
-
if (retryCount % 3 == 0) {
|
375
|
-
logger.warn(message, exception);
|
376
|
-
}
|
377
|
-
else {
|
378
|
-
logger.warn(message);
|
379
|
-
}
|
380
|
-
}
|
381
|
-
|
382
|
-
@Override
|
383
|
-
public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
|
384
|
-
{
|
385
|
-
}
|
386
|
-
});
|
387
|
-
}
|
388
|
-
catch (RetryGiveupException ex) {
|
389
|
-
throw Throwables.propagate(ex.getCause());
|
390
|
-
}
|
391
|
-
catch (InterruptedException ex) {
|
392
|
-
throw new InterruptedIOException();
|
393
|
-
}
|
394
|
-
}
|
395
|
-
|
396
|
-
/*
|
397
|
-
MD5 hash sum on GCS bucket is encoded with base64.
|
398
|
-
You can get same hash with following commands.
|
399
|
-
$ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
|
400
|
-
or
|
401
|
-
$ gsutil hash -m /path/to/file.txt
|
402
|
-
*/
|
403
|
-
private String getLocalMd5hash(String filePath) throws IOException
|
404
|
-
{
|
405
|
-
try {
|
406
|
-
MessageDigest md = MessageDigest.getInstance("MD5");
|
407
|
-
try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
|
408
|
-
byte[] buffer = new byte[256];
|
409
|
-
int len;
|
410
|
-
while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
|
411
|
-
md.update(buffer, 0, len);
|
412
|
-
}
|
413
|
-
return new String(Base64.encodeBase64(md.digest()));
|
414
|
-
}
|
415
|
-
}
|
416
|
-
catch (NoSuchAlgorithmException ex) {
|
417
|
-
throw new ConfigException("MD5 algorism not found");
|
418
|
-
}
|
419
|
-
}
|
420
|
-
}
|
421
|
-
|
422
|
-
/**
|
423
|
-
* GCS has character limitation in object names.
|
424
|
-
* @see https://cloud.google.com/storage/docs/naming#objectnames
|
425
|
-
* Although "." isn't listed at above pages, we can't access "./" path from GUI console.
|
426
|
-
* And in many cases, user don't intend of creating "/" directory under the bucket.
|
427
|
-
* This method normalizes path when it contains "./" and "/" and its variations at the beginning
|
428
|
-
*/
|
429
|
-
private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
|
430
|
-
{
|
431
|
-
String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
|
432
|
-
return path.replaceFirst("^\\.*/*", "");
|
433
|
-
}
|
434
|
-
|
435
|
-
public enum AuthMethod
|
436
|
-
{
|
437
|
-
private_key("private_key"),
|
438
|
-
compute_engine("compute_engine"),
|
439
|
-
json_key("json_key");
|
440
|
-
|
441
|
-
private final String string;
|
442
|
-
|
443
|
-
AuthMethod(String string)
|
444
|
-
{
|
445
|
-
this.string = string;
|
446
|
-
}
|
447
|
-
|
448
|
-
public String getString()
|
449
|
-
{
|
450
|
-
return string;
|
451
|
-
}
|
452
|
-
}
|
453
118
|
}
|
@@ -0,0 +1,252 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import com.google.api.client.http.InputStreamContent;
|
4
|
+
import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
|
5
|
+
import com.google.api.services.storage.Storage;
|
6
|
+
import com.google.api.services.storage.model.StorageObject;
|
7
|
+
import com.google.common.base.Throwables;
|
8
|
+
import org.embulk.config.ConfigException;
|
9
|
+
import org.embulk.config.TaskReport;
|
10
|
+
import org.embulk.spi.Buffer;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.TransactionalFileOutput;
|
13
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
14
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
15
|
+
import org.slf4j.Logger;
|
16
|
+
|
17
|
+
import java.io.BufferedInputStream;
|
18
|
+
import java.io.BufferedOutputStream;
|
19
|
+
import java.io.File;
|
20
|
+
import java.io.FileInputStream;
|
21
|
+
import java.io.FileOutputStream;
|
22
|
+
import java.io.IOException;
|
23
|
+
import java.io.InterruptedIOException;
|
24
|
+
import java.security.MessageDigest;
|
25
|
+
import java.security.NoSuchAlgorithmException;
|
26
|
+
import java.util.ArrayList;
|
27
|
+
import java.util.List;
|
28
|
+
|
29
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
30
|
+
|
31
|
+
public class GcsTransactionalFileOutput implements TransactionalFileOutput
|
32
|
+
{
|
33
|
+
private static final Logger logger = Exec.getLogger(GcsTransactionalFileOutput.class);
|
34
|
+
|
35
|
+
private final int taskIndex;
|
36
|
+
private final Storage client;
|
37
|
+
private final String bucket;
|
38
|
+
private final String pathPrefix;
|
39
|
+
private final String pathSuffix;
|
40
|
+
private final String sequenceFormat;
|
41
|
+
private final String contentType;
|
42
|
+
private final int maxConnectionRetry;
|
43
|
+
private final List<StorageObject> storageObjects = new ArrayList<>();
|
44
|
+
|
45
|
+
private int fileIndex = 0;
|
46
|
+
private int callCount = 0;
|
47
|
+
private BufferedOutputStream currentStream = null;
|
48
|
+
private StorageObject currentUpload = null;
|
49
|
+
private File tempFile = null;
|
50
|
+
|
51
|
+
GcsTransactionalFileOutput(PluginTask task, Storage client, int taskIndex)
|
52
|
+
{
|
53
|
+
this.taskIndex = taskIndex;
|
54
|
+
this.client = client;
|
55
|
+
this.bucket = task.getBucket();
|
56
|
+
this.pathPrefix = task.getPathPrefix();
|
57
|
+
this.pathSuffix = task.getFileNameExtension();
|
58
|
+
this.sequenceFormat = task.getSequenceFormat();
|
59
|
+
this.contentType = task.getContentType();
|
60
|
+
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
61
|
+
}
|
62
|
+
|
63
|
+
public void nextFile()
|
64
|
+
{
|
65
|
+
closeCurrentUpload();
|
66
|
+
try {
|
67
|
+
tempFile = Exec.getTempFileSpace().createTempFile();
|
68
|
+
currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
|
69
|
+
fileIndex++;
|
70
|
+
}
|
71
|
+
catch (IOException ex) {
|
72
|
+
Throwables.propagate(ex);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void add(Buffer buffer)
|
78
|
+
{
|
79
|
+
try {
|
80
|
+
logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
|
81
|
+
currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
82
|
+
callCount++;
|
83
|
+
}
|
84
|
+
catch (IOException ex) {
|
85
|
+
throw new RuntimeException(ex);
|
86
|
+
}
|
87
|
+
finally {
|
88
|
+
buffer.release();
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
@Override
|
93
|
+
public void finish()
|
94
|
+
{
|
95
|
+
String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
|
96
|
+
close();
|
97
|
+
if (tempFile != null) {
|
98
|
+
currentUpload = startUpload(path);
|
99
|
+
}
|
100
|
+
|
101
|
+
closeCurrentUpload();
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public void close()
|
106
|
+
{
|
107
|
+
try {
|
108
|
+
if (currentStream != null) {
|
109
|
+
currentStream.close();
|
110
|
+
currentStream = null;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
catch (IOException ex) {
|
114
|
+
throw Throwables.propagate(ex);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
@Override
|
119
|
+
public void abort()
|
120
|
+
{
|
121
|
+
}
|
122
|
+
|
123
|
+
@Override
|
124
|
+
public TaskReport commit()
|
125
|
+
{
|
126
|
+
TaskReport report = Exec.newTaskReport();
|
127
|
+
report.set("files", storageObjects);
|
128
|
+
return report;
|
129
|
+
}
|
130
|
+
|
131
|
+
private void closeCurrentUpload()
|
132
|
+
{
|
133
|
+
if (currentUpload != null) {
|
134
|
+
StorageObject obj = currentUpload;
|
135
|
+
storageObjects.add(obj);
|
136
|
+
logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
|
137
|
+
currentUpload = null;
|
138
|
+
}
|
139
|
+
|
140
|
+
callCount = 0;
|
141
|
+
}
|
142
|
+
|
143
|
+
private StorageObject startUpload(final String path)
|
144
|
+
{
|
145
|
+
try {
|
146
|
+
final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
|
147
|
+
|
148
|
+
return execUploadWithRetry(path, hash);
|
149
|
+
}
|
150
|
+
catch (IOException ex) {
|
151
|
+
throw Throwables.propagate(ex);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
|
156
|
+
{
|
157
|
+
try {
|
158
|
+
return retryExecutor()
|
159
|
+
.withRetryLimit(maxConnectionRetry)
|
160
|
+
.withInitialRetryWait(500)
|
161
|
+
.withMaxRetryWait(30 * 1000)
|
162
|
+
.runInterruptible(new Retryable<StorageObject>() {
|
163
|
+
@Override
|
164
|
+
public StorageObject call() throws IOException
|
165
|
+
{
|
166
|
+
try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
|
167
|
+
InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
|
168
|
+
mediaContent.setCloseInputStream(true);
|
169
|
+
|
170
|
+
StorageObject objectMetadata = new StorageObject();
|
171
|
+
objectMetadata.setName(path);
|
172
|
+
|
173
|
+
final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
|
174
|
+
insert.setDisableGZipContent(true);
|
175
|
+
StorageObject obj = insert.execute();
|
176
|
+
|
177
|
+
logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
|
178
|
+
return obj;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
@Override
|
183
|
+
public boolean isRetryableException(Exception exception)
|
184
|
+
{
|
185
|
+
return true;
|
186
|
+
}
|
187
|
+
|
188
|
+
@Override
|
189
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
|
190
|
+
{
|
191
|
+
String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
|
192
|
+
retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
|
193
|
+
if (retryCount % 3 == 0) {
|
194
|
+
logger.warn(message, exception);
|
195
|
+
}
|
196
|
+
else {
|
197
|
+
logger.warn(message);
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
@Override
|
202
|
+
public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
|
203
|
+
{
|
204
|
+
}
|
205
|
+
});
|
206
|
+
}
|
207
|
+
catch (RetryGiveupException ex) {
|
208
|
+
throw Throwables.propagate(ex.getCause());
|
209
|
+
}
|
210
|
+
catch (InterruptedException ex) {
|
211
|
+
throw new InterruptedIOException();
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
/*
|
216
|
+
MD5 hash sum on GCS bucket is encoded with base64.
|
217
|
+
You can get same hash with following commands.
|
218
|
+
$ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
|
219
|
+
or
|
220
|
+
$ gsutil hash -m /path/to/file.txt
|
221
|
+
*/
|
222
|
+
private String getLocalMd5hash(String filePath) throws IOException
|
223
|
+
{
|
224
|
+
try {
|
225
|
+
MessageDigest md = MessageDigest.getInstance("MD5");
|
226
|
+
try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
|
227
|
+
byte[] buffer = new byte[256];
|
228
|
+
int len;
|
229
|
+
while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
|
230
|
+
md.update(buffer, 0, len);
|
231
|
+
}
|
232
|
+
return new String(Base64.encodeBase64(md.digest()));
|
233
|
+
}
|
234
|
+
}
|
235
|
+
catch (NoSuchAlgorithmException ex) {
|
236
|
+
throw new ConfigException("MD5 algorism not found");
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
/**
|
241
|
+
* GCS has character limitation in object names.
|
242
|
+
* @see https://cloud.google.com/storage/docs/naming#objectnames
|
243
|
+
* Although "." isn't listed at above pages, we can't access "./" path from GUI console.
|
244
|
+
* And in many cases, user don't intend of creating "/" directory under the bucket.
|
245
|
+
* This method normalizes path when it contains "./" and "/" and its variations at the beginning
|
246
|
+
*/
|
247
|
+
private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
|
248
|
+
{
|
249
|
+
String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
|
250
|
+
return path.replaceFirst("^\\.*/*", "");
|
251
|
+
}
|
252
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.spi.unit.LocalFile;
|
8
|
+
|
9
|
+
public interface PluginTask extends Task
|
10
|
+
{
|
11
|
+
@Config("bucket")
|
12
|
+
String getBucket();
|
13
|
+
|
14
|
+
@Config("path_prefix")
|
15
|
+
String getPathPrefix();
|
16
|
+
|
17
|
+
@Config("file_ext")
|
18
|
+
String getFileNameExtension();
|
19
|
+
|
20
|
+
@Config("sequence_format")
|
21
|
+
@ConfigDefault("\".%03d.%02d\"")
|
22
|
+
String getSequenceFormat();
|
23
|
+
|
24
|
+
@Config("content_type")
|
25
|
+
@ConfigDefault("\"application/octet-stream\"")
|
26
|
+
String getContentType();
|
27
|
+
|
28
|
+
@Config("auth_method")
|
29
|
+
@ConfigDefault("\"private_key\"")
|
30
|
+
AuthMethod getAuthMethod();
|
31
|
+
|
32
|
+
@Config("service_account_email")
|
33
|
+
@ConfigDefault("null")
|
34
|
+
Optional<String> getServiceAccountEmail();
|
35
|
+
|
36
|
+
// kept for backward compatibility
|
37
|
+
@Config("p12_keyfile_path")
|
38
|
+
@ConfigDefault("null")
|
39
|
+
Optional<String> getP12KeyfilePath();
|
40
|
+
|
41
|
+
@Config("p12_keyfile")
|
42
|
+
@ConfigDefault("null")
|
43
|
+
Optional<LocalFile> getP12Keyfile();
|
44
|
+
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
45
|
+
|
46
|
+
@Config("json_keyfile")
|
47
|
+
@ConfigDefault("null")
|
48
|
+
Optional<LocalFile> getJsonKeyfile();
|
49
|
+
|
50
|
+
@Config("application_name")
|
51
|
+
@ConfigDefault("\"embulk-output-gcs\"")
|
52
|
+
String getApplicationName();
|
53
|
+
|
54
|
+
@Config("max_connection_retry")
|
55
|
+
@ConfigDefault("10") // 10 times retry to connect GCS server if failed.
|
56
|
+
int getMaxConnectionRetry();
|
57
|
+
}
|
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
|
|
11
11
|
import org.embulk.config.ConfigSource;
|
12
12
|
import org.embulk.config.TaskReport;
|
13
13
|
import org.embulk.config.TaskSource;
|
14
|
-
import org.embulk.output.GcsOutputPlugin.PluginTask;
|
15
14
|
import org.embulk.spi.Buffer;
|
16
15
|
import org.embulk.spi.Exec;
|
17
16
|
import org.embulk.spi.FileOutputPlugin;
|
@@ -98,7 +97,7 @@ public class TestGcsOutputPlugin
|
|
98
97
|
.set("file_ext", ".csv")
|
99
98
|
.set("formatter", formatterConfig());
|
100
99
|
|
101
|
-
|
100
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
102
101
|
assertEquals("private_key", task.getAuthMethod().toString());
|
103
102
|
}
|
104
103
|
|
@@ -284,7 +283,7 @@ public class TestGcsOutputPlugin
|
|
284
283
|
{
|
285
284
|
ConfigSource configSource = config();
|
286
285
|
PluginTask task = configSource.loadConfig(PluginTask.class);
|
287
|
-
Method method =
|
286
|
+
Method method = GcsTransactionalFileOutput.class.getDeclaredMethod("generateRemotePath", String.class, String.class, int.class, int.class, String.class);
|
288
287
|
method.setAccessible(true);
|
289
288
|
assertEquals("sample.000.01.csv", method.invoke(plugin, "/sample", task.getSequenceFormat(), 0, 1, ".csv"));
|
290
289
|
assertEquals("sample.000.01.csv", method.invoke(plugin, "./sample", task.getSequenceFormat(), 0, 1, ".csv"));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuyuki Honda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,24 +62,27 @@ files:
|
|
62
62
|
- gradlew
|
63
63
|
- gradlew.bat
|
64
64
|
- lib/embulk/output/gcs.rb
|
65
|
+
- src/main/java/org/embulk/output/AuthMethod.java
|
65
66
|
- src/main/java/org/embulk/output/GcsAuthentication.java
|
66
67
|
- src/main/java/org/embulk/output/GcsOutputPlugin.java
|
68
|
+
- src/main/java/org/embulk/output/GcsTransactionalFileOutput.java
|
69
|
+
- src/main/java/org/embulk/output/PluginTask.java
|
67
70
|
- src/test/java/org/embulk/output/TestGcsAuthentication.java
|
68
71
|
- src/test/java/org/embulk/output/TestGcsOutputPlugin.java
|
69
72
|
- src/test/resources/keys.tar.enc
|
70
73
|
- src/test/resources/sample_01.csv
|
71
74
|
- src/test/resources/sample_02.csv
|
72
|
-
- classpath/commons-codec-1.3.jar
|
73
|
-
- classpath/commons-logging-1.1.1.jar
|
74
|
-
- classpath/embulk-output-gcs-0.4.1.jar
|
75
|
-
- classpath/google-api-client-1.19.1.jar
|
76
75
|
- classpath/google-api-services-storage-v1-rev28-1.19.1.jar
|
76
|
+
- classpath/httpclient-4.0.1.jar
|
77
|
+
- classpath/jsr305-1.3.9.jar
|
78
|
+
- classpath/embulk-output-gcs-0.4.2.jar
|
79
|
+
- classpath/commons-logging-1.1.1.jar
|
77
80
|
- classpath/google-http-client-1.19.0.jar
|
81
|
+
- classpath/google-api-client-1.19.1.jar
|
82
|
+
- classpath/commons-codec-1.3.jar
|
83
|
+
- classpath/httpcore-4.0.1.jar
|
78
84
|
- classpath/google-http-client-jackson2-1.19.0.jar
|
79
85
|
- classpath/google-oauth-client-1.19.0.jar
|
80
|
-
- classpath/httpclient-4.0.1.jar
|
81
|
-
- classpath/httpcore-4.0.1.jar
|
82
|
-
- classpath/jsr305-1.3.9.jar
|
83
86
|
homepage: https://github.com/hakobera/embulk-output-gcs
|
84
87
|
licenses:
|
85
88
|
- MIT
|