embulk-output-gcs 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +0 -2
- data/build.gradle +3 -3
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +2 -2
- data/src/main/java/org/embulk/output/AuthMethod.java +20 -0
- data/src/main/java/org/embulk/output/GcsOutputPlugin.java +1 -336
- data/src/main/java/org/embulk/output/GcsTransactionalFileOutput.java +252 -0
- data/src/main/java/org/embulk/output/PluginTask.java +57 -0
- data/src/test/java/org/embulk/output/TestGcsOutputPlugin.java +2 -3
- metadata +12 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0d6f9617394f320742be74ac318d99ef0f93c06
|
4
|
+
data.tar.gz: af9f3a3679a2d76b5c2944fd7e8264ecab3b3a27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 086960d449e95821defa56b6860eaf2a45fe753db3c7d763272c694ae5fd87b07aba937bb170f748b6d779775e71145c9ff350528d12ea6d770e64d40b67f3ec
|
7
|
+
data.tar.gz: 99a7bc4c683d61b500780c316be1c7861181cccde8f20daf6b674e96068a2fc9414db5b830013c0f6c223b33d2cc7a4d164bc62536ed0082ba419754ac8a7980
|
data/.travis.yml
CHANGED
data/build.gradle
CHANGED
@@ -14,10 +14,10 @@ configurations {
|
|
14
14
|
provided
|
15
15
|
}
|
16
16
|
|
17
|
-
sourceCompatibility = 1.
|
18
|
-
targetCompatibility = 1.
|
17
|
+
sourceCompatibility = 1.8
|
18
|
+
targetCompatibility = 1.8
|
19
19
|
|
20
|
-
version = "0.4.
|
20
|
+
version = "0.4.2"
|
21
21
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.6"
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
#Sun Jan 08 00:35:58 PST 2017
|
2
2
|
distributionBase=GRADLE_USER_HOME
|
3
3
|
distributionPath=wrapper/dists
|
4
4
|
zipStoreBase=GRADLE_USER_HOME
|
5
5
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.
|
6
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
|
@@ -0,0 +1,20 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
public enum AuthMethod
|
4
|
+
{
|
5
|
+
private_key("private_key"),
|
6
|
+
compute_engine("compute_engine"),
|
7
|
+
json_key("json_key");
|
8
|
+
|
9
|
+
private final String string;
|
10
|
+
|
11
|
+
AuthMethod(String string)
|
12
|
+
{
|
13
|
+
this.string = string;
|
14
|
+
}
|
15
|
+
|
16
|
+
public String getString()
|
17
|
+
{
|
18
|
+
return string;
|
19
|
+
}
|
20
|
+
}
|
@@ -1,102 +1,25 @@
|
|
1
1
|
package org.embulk.output;
|
2
2
|
|
3
|
-
import com.google.api.client.http.InputStreamContent;
|
4
|
-
import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
|
5
3
|
import com.google.api.services.storage.Storage;
|
6
|
-
import com.google.api.services.storage.model.StorageObject;
|
7
4
|
import com.google.common.base.Function;
|
8
5
|
import com.google.common.base.Optional;
|
9
6
|
import com.google.common.base.Throwables;
|
10
|
-
import org.embulk.config.Config;
|
11
|
-
import org.embulk.config.ConfigDefault;
|
12
7
|
import org.embulk.config.ConfigDiff;
|
13
8
|
import org.embulk.config.ConfigException;
|
14
9
|
import org.embulk.config.ConfigSource;
|
15
|
-
import org.embulk.config.Task;
|
16
10
|
import org.embulk.config.TaskReport;
|
17
11
|
import org.embulk.config.TaskSource;
|
18
|
-
import org.embulk.spi.Buffer;
|
19
12
|
import org.embulk.spi.Exec;
|
20
13
|
import org.embulk.spi.FileOutputPlugin;
|
21
14
|
import org.embulk.spi.TransactionalFileOutput;
|
22
15
|
import org.embulk.spi.unit.LocalFile;
|
23
|
-
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
24
|
-
import org.embulk.spi.util.RetryExecutor.Retryable;
|
25
|
-
import org.slf4j.Logger;
|
26
|
-
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
27
16
|
|
28
|
-
import java.io.BufferedInputStream;
|
29
|
-
import java.io.BufferedOutputStream;
|
30
|
-
import java.io.File;
|
31
|
-
import java.io.FileInputStream;
|
32
|
-
import java.io.FileOutputStream;
|
33
17
|
import java.io.IOException;
|
34
|
-
import java.io.InterruptedIOException;
|
35
18
|
import java.security.GeneralSecurityException;
|
36
|
-
import java.security.MessageDigest;
|
37
|
-
import java.security.NoSuchAlgorithmException;
|
38
|
-
import java.util.ArrayList;
|
39
19
|
import java.util.List;
|
40
|
-
import java.util.concurrent.Callable;
|
41
|
-
import java.util.concurrent.ExecutionException;
|
42
|
-
import java.util.concurrent.ExecutorService;
|
43
|
-
import java.util.concurrent.Executors;
|
44
|
-
import java.util.concurrent.Future;
|
45
20
|
|
46
21
|
public class GcsOutputPlugin implements FileOutputPlugin
|
47
22
|
{
|
48
|
-
private static final Logger logger = Exec.getLogger(GcsOutputPlugin.class);
|
49
|
-
|
50
|
-
public interface PluginTask extends Task
|
51
|
-
{
|
52
|
-
@Config("bucket")
|
53
|
-
String getBucket();
|
54
|
-
|
55
|
-
@Config("path_prefix")
|
56
|
-
String getPathPrefix();
|
57
|
-
|
58
|
-
@Config("file_ext")
|
59
|
-
String getFileNameExtension();
|
60
|
-
|
61
|
-
@Config("sequence_format")
|
62
|
-
@ConfigDefault("\".%03d.%02d\"")
|
63
|
-
String getSequenceFormat();
|
64
|
-
|
65
|
-
@Config("content_type")
|
66
|
-
@ConfigDefault("\"application/octet-stream\"")
|
67
|
-
String getContentType();
|
68
|
-
|
69
|
-
@Config("auth_method")
|
70
|
-
@ConfigDefault("\"private_key\"")
|
71
|
-
AuthMethod getAuthMethod();
|
72
|
-
|
73
|
-
@Config("service_account_email")
|
74
|
-
@ConfigDefault("null")
|
75
|
-
Optional<String> getServiceAccountEmail();
|
76
|
-
|
77
|
-
// kept for backward compatibility
|
78
|
-
@Config("p12_keyfile_path")
|
79
|
-
@ConfigDefault("null")
|
80
|
-
Optional<String> getP12KeyfilePath();
|
81
|
-
|
82
|
-
@Config("p12_keyfile")
|
83
|
-
@ConfigDefault("null")
|
84
|
-
Optional<LocalFile> getP12Keyfile();
|
85
|
-
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
86
|
-
|
87
|
-
@Config("json_keyfile")
|
88
|
-
@ConfigDefault("null")
|
89
|
-
Optional<LocalFile> getJsonKeyfile();
|
90
|
-
|
91
|
-
@Config("application_name")
|
92
|
-
@ConfigDefault("\"embulk-output-gcs\"")
|
93
|
-
String getApplicationName();
|
94
|
-
|
95
|
-
@Config("max_connection_retry")
|
96
|
-
@ConfigDefault("10") // 10 times retry to connect GCS server if failed.
|
97
|
-
int getMaxConnectionRetry();
|
98
|
-
}
|
99
|
-
|
100
23
|
@Override
|
101
24
|
public ConfigDiff transaction(ConfigSource config,
|
102
25
|
int taskCount,
|
@@ -152,7 +75,7 @@ public class GcsOutputPlugin implements FileOutputPlugin
|
|
152
75
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
153
76
|
|
154
77
|
Storage client = createClient(task);
|
155
|
-
return new
|
78
|
+
return new GcsTransactionalFileOutput(task, client, taskIndex);
|
156
79
|
}
|
157
80
|
|
158
81
|
private GcsAuthentication newGcsAuth(PluginTask task)
|
@@ -192,262 +115,4 @@ public class GcsOutputPlugin implements FileOutputPlugin
|
|
192
115
|
}
|
193
116
|
};
|
194
117
|
}
|
195
|
-
|
196
|
-
static class TransactionalGcsFileOutput implements TransactionalFileOutput
|
197
|
-
{
|
198
|
-
private final int taskIndex;
|
199
|
-
private final Storage client;
|
200
|
-
private final String bucket;
|
201
|
-
private final String pathPrefix;
|
202
|
-
private final String pathSuffix;
|
203
|
-
private final String sequenceFormat;
|
204
|
-
private final String contentType;
|
205
|
-
private final int maxConnectionRetry;
|
206
|
-
private final List<StorageObject> storageObjects = new ArrayList<>();
|
207
|
-
|
208
|
-
private int fileIndex = 0;
|
209
|
-
private int callCount = 0;
|
210
|
-
private BufferedOutputStream currentStream = null;
|
211
|
-
private Future<StorageObject> currentUpload = null;
|
212
|
-
private File tempFile = null;
|
213
|
-
|
214
|
-
TransactionalGcsFileOutput(PluginTask task, Storage client, int taskIndex)
|
215
|
-
{
|
216
|
-
this.taskIndex = taskIndex;
|
217
|
-
this.client = client;
|
218
|
-
this.bucket = task.getBucket();
|
219
|
-
this.pathPrefix = task.getPathPrefix();
|
220
|
-
this.pathSuffix = task.getFileNameExtension();
|
221
|
-
this.sequenceFormat = task.getSequenceFormat();
|
222
|
-
this.contentType = task.getContentType();
|
223
|
-
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
224
|
-
}
|
225
|
-
|
226
|
-
public void nextFile()
|
227
|
-
{
|
228
|
-
closeCurrentUpload();
|
229
|
-
try {
|
230
|
-
tempFile = Exec.getTempFileSpace().createTempFile();
|
231
|
-
currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
|
232
|
-
fileIndex++;
|
233
|
-
}
|
234
|
-
catch (IOException ex) {
|
235
|
-
Throwables.propagate(ex);
|
236
|
-
}
|
237
|
-
}
|
238
|
-
|
239
|
-
@Override
|
240
|
-
public void add(Buffer buffer)
|
241
|
-
{
|
242
|
-
try {
|
243
|
-
logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
|
244
|
-
currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
245
|
-
callCount++;
|
246
|
-
}
|
247
|
-
catch (IOException ex) {
|
248
|
-
throw new RuntimeException(ex);
|
249
|
-
}
|
250
|
-
finally {
|
251
|
-
buffer.release();
|
252
|
-
}
|
253
|
-
}
|
254
|
-
|
255
|
-
@Override
|
256
|
-
public void finish()
|
257
|
-
{
|
258
|
-
String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
|
259
|
-
close();
|
260
|
-
if (tempFile != null) {
|
261
|
-
currentUpload = startUpload(path);
|
262
|
-
}
|
263
|
-
|
264
|
-
closeCurrentUpload();
|
265
|
-
}
|
266
|
-
|
267
|
-
@Override
|
268
|
-
public void close()
|
269
|
-
{
|
270
|
-
try {
|
271
|
-
if (currentStream != null) {
|
272
|
-
currentStream.close();
|
273
|
-
currentStream = null;
|
274
|
-
}
|
275
|
-
}
|
276
|
-
catch (IOException ex) {
|
277
|
-
throw Throwables.propagate(ex);
|
278
|
-
}
|
279
|
-
}
|
280
|
-
|
281
|
-
@Override
|
282
|
-
public void abort()
|
283
|
-
{
|
284
|
-
}
|
285
|
-
|
286
|
-
@Override
|
287
|
-
public TaskReport commit()
|
288
|
-
{
|
289
|
-
TaskReport report = Exec.newTaskReport();
|
290
|
-
report.set("files", storageObjects);
|
291
|
-
return report;
|
292
|
-
}
|
293
|
-
|
294
|
-
private void closeCurrentUpload()
|
295
|
-
{
|
296
|
-
try {
|
297
|
-
if (currentUpload != null) {
|
298
|
-
StorageObject obj = currentUpload.get();
|
299
|
-
storageObjects.add(obj);
|
300
|
-
logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
|
301
|
-
currentUpload = null;
|
302
|
-
}
|
303
|
-
|
304
|
-
callCount = 0;
|
305
|
-
}
|
306
|
-
catch (InterruptedException | ExecutionException ex) {
|
307
|
-
throw Throwables.propagate(ex);
|
308
|
-
}
|
309
|
-
}
|
310
|
-
|
311
|
-
private Future<StorageObject> startUpload(final String path)
|
312
|
-
{
|
313
|
-
try {
|
314
|
-
final ExecutorService executor = Executors.newCachedThreadPool();
|
315
|
-
final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
|
316
|
-
|
317
|
-
return executor.submit(new Callable<StorageObject>() {
|
318
|
-
@Override
|
319
|
-
public StorageObject call() throws IOException
|
320
|
-
{
|
321
|
-
try {
|
322
|
-
logger.info("Uploading '{}/{}'", bucket, path);
|
323
|
-
return execUploadWithRetry(path, hash);
|
324
|
-
}
|
325
|
-
finally {
|
326
|
-
executor.shutdown();
|
327
|
-
}
|
328
|
-
}
|
329
|
-
});
|
330
|
-
}
|
331
|
-
catch (IOException ex) {
|
332
|
-
throw Throwables.propagate(ex);
|
333
|
-
}
|
334
|
-
}
|
335
|
-
|
336
|
-
private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
|
337
|
-
{
|
338
|
-
try {
|
339
|
-
return retryExecutor()
|
340
|
-
.withRetryLimit(maxConnectionRetry)
|
341
|
-
.withInitialRetryWait(500)
|
342
|
-
.withMaxRetryWait(30 * 1000)
|
343
|
-
.runInterruptible(new Retryable<StorageObject>() {
|
344
|
-
@Override
|
345
|
-
public StorageObject call() throws IOException, RetryGiveupException
|
346
|
-
{
|
347
|
-
try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
|
348
|
-
InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
|
349
|
-
mediaContent.setCloseInputStream(true);
|
350
|
-
|
351
|
-
StorageObject objectMetadata = new StorageObject();
|
352
|
-
objectMetadata.setName(path);
|
353
|
-
|
354
|
-
final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
|
355
|
-
insert.setDisableGZipContent(true);
|
356
|
-
StorageObject obj = insert.execute();
|
357
|
-
|
358
|
-
logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
|
359
|
-
return obj;
|
360
|
-
}
|
361
|
-
}
|
362
|
-
|
363
|
-
@Override
|
364
|
-
public boolean isRetryableException(Exception exception)
|
365
|
-
{
|
366
|
-
return true;
|
367
|
-
}
|
368
|
-
|
369
|
-
@Override
|
370
|
-
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
|
371
|
-
{
|
372
|
-
String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
|
373
|
-
retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
|
374
|
-
if (retryCount % 3 == 0) {
|
375
|
-
logger.warn(message, exception);
|
376
|
-
}
|
377
|
-
else {
|
378
|
-
logger.warn(message);
|
379
|
-
}
|
380
|
-
}
|
381
|
-
|
382
|
-
@Override
|
383
|
-
public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
|
384
|
-
{
|
385
|
-
}
|
386
|
-
});
|
387
|
-
}
|
388
|
-
catch (RetryGiveupException ex) {
|
389
|
-
throw Throwables.propagate(ex.getCause());
|
390
|
-
}
|
391
|
-
catch (InterruptedException ex) {
|
392
|
-
throw new InterruptedIOException();
|
393
|
-
}
|
394
|
-
}
|
395
|
-
|
396
|
-
/*
|
397
|
-
MD5 hash sum on GCS bucket is encoded with base64.
|
398
|
-
You can get same hash with following commands.
|
399
|
-
$ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
|
400
|
-
or
|
401
|
-
$ gsutil hash -m /path/to/file.txt
|
402
|
-
*/
|
403
|
-
private String getLocalMd5hash(String filePath) throws IOException
|
404
|
-
{
|
405
|
-
try {
|
406
|
-
MessageDigest md = MessageDigest.getInstance("MD5");
|
407
|
-
try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
|
408
|
-
byte[] buffer = new byte[256];
|
409
|
-
int len;
|
410
|
-
while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
|
411
|
-
md.update(buffer, 0, len);
|
412
|
-
}
|
413
|
-
return new String(Base64.encodeBase64(md.digest()));
|
414
|
-
}
|
415
|
-
}
|
416
|
-
catch (NoSuchAlgorithmException ex) {
|
417
|
-
throw new ConfigException("MD5 algorism not found");
|
418
|
-
}
|
419
|
-
}
|
420
|
-
}
|
421
|
-
|
422
|
-
/**
|
423
|
-
* GCS has character limitation in object names.
|
424
|
-
* @see https://cloud.google.com/storage/docs/naming#objectnames
|
425
|
-
* Although "." isn't listed at above pages, we can't access "./" path from GUI console.
|
426
|
-
* And in many cases, user don't intend of creating "/" directory under the bucket.
|
427
|
-
* This method normalizes path when it contains "./" and "/" and its variations at the beginning
|
428
|
-
*/
|
429
|
-
private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
|
430
|
-
{
|
431
|
-
String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
|
432
|
-
return path.replaceFirst("^\\.*/*", "");
|
433
|
-
}
|
434
|
-
|
435
|
-
public enum AuthMethod
|
436
|
-
{
|
437
|
-
private_key("private_key"),
|
438
|
-
compute_engine("compute_engine"),
|
439
|
-
json_key("json_key");
|
440
|
-
|
441
|
-
private final String string;
|
442
|
-
|
443
|
-
AuthMethod(String string)
|
444
|
-
{
|
445
|
-
this.string = string;
|
446
|
-
}
|
447
|
-
|
448
|
-
public String getString()
|
449
|
-
{
|
450
|
-
return string;
|
451
|
-
}
|
452
|
-
}
|
453
118
|
}
|
@@ -0,0 +1,252 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import com.google.api.client.http.InputStreamContent;
|
4
|
+
import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
|
5
|
+
import com.google.api.services.storage.Storage;
|
6
|
+
import com.google.api.services.storage.model.StorageObject;
|
7
|
+
import com.google.common.base.Throwables;
|
8
|
+
import org.embulk.config.ConfigException;
|
9
|
+
import org.embulk.config.TaskReport;
|
10
|
+
import org.embulk.spi.Buffer;
|
11
|
+
import org.embulk.spi.Exec;
|
12
|
+
import org.embulk.spi.TransactionalFileOutput;
|
13
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
14
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
15
|
+
import org.slf4j.Logger;
|
16
|
+
|
17
|
+
import java.io.BufferedInputStream;
|
18
|
+
import java.io.BufferedOutputStream;
|
19
|
+
import java.io.File;
|
20
|
+
import java.io.FileInputStream;
|
21
|
+
import java.io.FileOutputStream;
|
22
|
+
import java.io.IOException;
|
23
|
+
import java.io.InterruptedIOException;
|
24
|
+
import java.security.MessageDigest;
|
25
|
+
import java.security.NoSuchAlgorithmException;
|
26
|
+
import java.util.ArrayList;
|
27
|
+
import java.util.List;
|
28
|
+
|
29
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
30
|
+
|
31
|
+
public class GcsTransactionalFileOutput implements TransactionalFileOutput
|
32
|
+
{
|
33
|
+
private static final Logger logger = Exec.getLogger(GcsTransactionalFileOutput.class);
|
34
|
+
|
35
|
+
private final int taskIndex;
|
36
|
+
private final Storage client;
|
37
|
+
private final String bucket;
|
38
|
+
private final String pathPrefix;
|
39
|
+
private final String pathSuffix;
|
40
|
+
private final String sequenceFormat;
|
41
|
+
private final String contentType;
|
42
|
+
private final int maxConnectionRetry;
|
43
|
+
private final List<StorageObject> storageObjects = new ArrayList<>();
|
44
|
+
|
45
|
+
private int fileIndex = 0;
|
46
|
+
private int callCount = 0;
|
47
|
+
private BufferedOutputStream currentStream = null;
|
48
|
+
private StorageObject currentUpload = null;
|
49
|
+
private File tempFile = null;
|
50
|
+
|
51
|
+
GcsTransactionalFileOutput(PluginTask task, Storage client, int taskIndex)
|
52
|
+
{
|
53
|
+
this.taskIndex = taskIndex;
|
54
|
+
this.client = client;
|
55
|
+
this.bucket = task.getBucket();
|
56
|
+
this.pathPrefix = task.getPathPrefix();
|
57
|
+
this.pathSuffix = task.getFileNameExtension();
|
58
|
+
this.sequenceFormat = task.getSequenceFormat();
|
59
|
+
this.contentType = task.getContentType();
|
60
|
+
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
61
|
+
}
|
62
|
+
|
63
|
+
public void nextFile()
|
64
|
+
{
|
65
|
+
closeCurrentUpload();
|
66
|
+
try {
|
67
|
+
tempFile = Exec.getTempFileSpace().createTempFile();
|
68
|
+
currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
|
69
|
+
fileIndex++;
|
70
|
+
}
|
71
|
+
catch (IOException ex) {
|
72
|
+
Throwables.propagate(ex);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void add(Buffer buffer)
|
78
|
+
{
|
79
|
+
try {
|
80
|
+
logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
|
81
|
+
currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
82
|
+
callCount++;
|
83
|
+
}
|
84
|
+
catch (IOException ex) {
|
85
|
+
throw new RuntimeException(ex);
|
86
|
+
}
|
87
|
+
finally {
|
88
|
+
buffer.release();
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
@Override
|
93
|
+
public void finish()
|
94
|
+
{
|
95
|
+
String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
|
96
|
+
close();
|
97
|
+
if (tempFile != null) {
|
98
|
+
currentUpload = startUpload(path);
|
99
|
+
}
|
100
|
+
|
101
|
+
closeCurrentUpload();
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public void close()
|
106
|
+
{
|
107
|
+
try {
|
108
|
+
if (currentStream != null) {
|
109
|
+
currentStream.close();
|
110
|
+
currentStream = null;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
catch (IOException ex) {
|
114
|
+
throw Throwables.propagate(ex);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
@Override
|
119
|
+
public void abort()
|
120
|
+
{
|
121
|
+
}
|
122
|
+
|
123
|
+
@Override
|
124
|
+
public TaskReport commit()
|
125
|
+
{
|
126
|
+
TaskReport report = Exec.newTaskReport();
|
127
|
+
report.set("files", storageObjects);
|
128
|
+
return report;
|
129
|
+
}
|
130
|
+
|
131
|
+
private void closeCurrentUpload()
|
132
|
+
{
|
133
|
+
if (currentUpload != null) {
|
134
|
+
StorageObject obj = currentUpload;
|
135
|
+
storageObjects.add(obj);
|
136
|
+
logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
|
137
|
+
currentUpload = null;
|
138
|
+
}
|
139
|
+
|
140
|
+
callCount = 0;
|
141
|
+
}
|
142
|
+
|
143
|
+
private StorageObject startUpload(final String path)
|
144
|
+
{
|
145
|
+
try {
|
146
|
+
final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
|
147
|
+
|
148
|
+
return execUploadWithRetry(path, hash);
|
149
|
+
}
|
150
|
+
catch (IOException ex) {
|
151
|
+
throw Throwables.propagate(ex);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
|
156
|
+
{
|
157
|
+
try {
|
158
|
+
return retryExecutor()
|
159
|
+
.withRetryLimit(maxConnectionRetry)
|
160
|
+
.withInitialRetryWait(500)
|
161
|
+
.withMaxRetryWait(30 * 1000)
|
162
|
+
.runInterruptible(new Retryable<StorageObject>() {
|
163
|
+
@Override
|
164
|
+
public StorageObject call() throws IOException
|
165
|
+
{
|
166
|
+
try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
|
167
|
+
InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
|
168
|
+
mediaContent.setCloseInputStream(true);
|
169
|
+
|
170
|
+
StorageObject objectMetadata = new StorageObject();
|
171
|
+
objectMetadata.setName(path);
|
172
|
+
|
173
|
+
final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
|
174
|
+
insert.setDisableGZipContent(true);
|
175
|
+
StorageObject obj = insert.execute();
|
176
|
+
|
177
|
+
logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
|
178
|
+
return obj;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
@Override
|
183
|
+
public boolean isRetryableException(Exception exception)
|
184
|
+
{
|
185
|
+
return true;
|
186
|
+
}
|
187
|
+
|
188
|
+
@Override
|
189
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
|
190
|
+
{
|
191
|
+
String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
|
192
|
+
retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
|
193
|
+
if (retryCount % 3 == 0) {
|
194
|
+
logger.warn(message, exception);
|
195
|
+
}
|
196
|
+
else {
|
197
|
+
logger.warn(message);
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
@Override
|
202
|
+
public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
|
203
|
+
{
|
204
|
+
}
|
205
|
+
});
|
206
|
+
}
|
207
|
+
catch (RetryGiveupException ex) {
|
208
|
+
throw Throwables.propagate(ex.getCause());
|
209
|
+
}
|
210
|
+
catch (InterruptedException ex) {
|
211
|
+
throw new InterruptedIOException();
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
/*
|
216
|
+
MD5 hash sum on GCS bucket is encoded with base64.
|
217
|
+
You can get same hash with following commands.
|
218
|
+
$ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
|
219
|
+
or
|
220
|
+
$ gsutil hash -m /path/to/file.txt
|
221
|
+
*/
|
222
|
+
private String getLocalMd5hash(String filePath) throws IOException
|
223
|
+
{
|
224
|
+
try {
|
225
|
+
MessageDigest md = MessageDigest.getInstance("MD5");
|
226
|
+
try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
|
227
|
+
byte[] buffer = new byte[256];
|
228
|
+
int len;
|
229
|
+
while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
|
230
|
+
md.update(buffer, 0, len);
|
231
|
+
}
|
232
|
+
return new String(Base64.encodeBase64(md.digest()));
|
233
|
+
}
|
234
|
+
}
|
235
|
+
catch (NoSuchAlgorithmException ex) {
|
236
|
+
throw new ConfigException("MD5 algorism not found");
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
/**
|
241
|
+
* GCS has character limitation in object names.
|
242
|
+
* @see https://cloud.google.com/storage/docs/naming#objectnames
|
243
|
+
* Although "." isn't listed at above pages, we can't access "./" path from GUI console.
|
244
|
+
* And in many cases, user don't intend of creating "/" directory under the bucket.
|
245
|
+
* This method normalizes path when it contains "./" and "/" and its variations at the beginning
|
246
|
+
*/
|
247
|
+
private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
|
248
|
+
{
|
249
|
+
String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
|
250
|
+
return path.replaceFirst("^\\.*/*", "");
|
251
|
+
}
|
252
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
package org.embulk.output;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.spi.unit.LocalFile;
|
8
|
+
|
9
|
+
public interface PluginTask extends Task
|
10
|
+
{
|
11
|
+
@Config("bucket")
|
12
|
+
String getBucket();
|
13
|
+
|
14
|
+
@Config("path_prefix")
|
15
|
+
String getPathPrefix();
|
16
|
+
|
17
|
+
@Config("file_ext")
|
18
|
+
String getFileNameExtension();
|
19
|
+
|
20
|
+
@Config("sequence_format")
|
21
|
+
@ConfigDefault("\".%03d.%02d\"")
|
22
|
+
String getSequenceFormat();
|
23
|
+
|
24
|
+
@Config("content_type")
|
25
|
+
@ConfigDefault("\"application/octet-stream\"")
|
26
|
+
String getContentType();
|
27
|
+
|
28
|
+
@Config("auth_method")
|
29
|
+
@ConfigDefault("\"private_key\"")
|
30
|
+
AuthMethod getAuthMethod();
|
31
|
+
|
32
|
+
@Config("service_account_email")
|
33
|
+
@ConfigDefault("null")
|
34
|
+
Optional<String> getServiceAccountEmail();
|
35
|
+
|
36
|
+
// kept for backward compatibility
|
37
|
+
@Config("p12_keyfile_path")
|
38
|
+
@ConfigDefault("null")
|
39
|
+
Optional<String> getP12KeyfilePath();
|
40
|
+
|
41
|
+
@Config("p12_keyfile")
|
42
|
+
@ConfigDefault("null")
|
43
|
+
Optional<LocalFile> getP12Keyfile();
|
44
|
+
void setP12Keyfile(Optional<LocalFile> p12Keyfile);
|
45
|
+
|
46
|
+
@Config("json_keyfile")
|
47
|
+
@ConfigDefault("null")
|
48
|
+
Optional<LocalFile> getJsonKeyfile();
|
49
|
+
|
50
|
+
@Config("application_name")
|
51
|
+
@ConfigDefault("\"embulk-output-gcs\"")
|
52
|
+
String getApplicationName();
|
53
|
+
|
54
|
+
@Config("max_connection_retry")
|
55
|
+
@ConfigDefault("10") // 10 times retry to connect GCS server if failed.
|
56
|
+
int getMaxConnectionRetry();
|
57
|
+
}
|
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
|
|
11
11
|
import org.embulk.config.ConfigSource;
|
12
12
|
import org.embulk.config.TaskReport;
|
13
13
|
import org.embulk.config.TaskSource;
|
14
|
-
import org.embulk.output.GcsOutputPlugin.PluginTask;
|
15
14
|
import org.embulk.spi.Buffer;
|
16
15
|
import org.embulk.spi.Exec;
|
17
16
|
import org.embulk.spi.FileOutputPlugin;
|
@@ -98,7 +97,7 @@ public class TestGcsOutputPlugin
|
|
98
97
|
.set("file_ext", ".csv")
|
99
98
|
.set("formatter", formatterConfig());
|
100
99
|
|
101
|
-
|
100
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
102
101
|
assertEquals("private_key", task.getAuthMethod().toString());
|
103
102
|
}
|
104
103
|
|
@@ -284,7 +283,7 @@ public class TestGcsOutputPlugin
|
|
284
283
|
{
|
285
284
|
ConfigSource configSource = config();
|
286
285
|
PluginTask task = configSource.loadConfig(PluginTask.class);
|
287
|
-
Method method =
|
286
|
+
Method method = GcsTransactionalFileOutput.class.getDeclaredMethod("generateRemotePath", String.class, String.class, int.class, int.class, String.class);
|
288
287
|
method.setAccessible(true);
|
289
288
|
assertEquals("sample.000.01.csv", method.invoke(plugin, "/sample", task.getSequenceFormat(), 0, 1, ".csv"));
|
290
289
|
assertEquals("sample.000.01.csv", method.invoke(plugin, "./sample", task.getSequenceFormat(), 0, 1, ".csv"));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-gcs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuyuki Honda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,24 +62,27 @@ files:
|
|
62
62
|
- gradlew
|
63
63
|
- gradlew.bat
|
64
64
|
- lib/embulk/output/gcs.rb
|
65
|
+
- src/main/java/org/embulk/output/AuthMethod.java
|
65
66
|
- src/main/java/org/embulk/output/GcsAuthentication.java
|
66
67
|
- src/main/java/org/embulk/output/GcsOutputPlugin.java
|
68
|
+
- src/main/java/org/embulk/output/GcsTransactionalFileOutput.java
|
69
|
+
- src/main/java/org/embulk/output/PluginTask.java
|
67
70
|
- src/test/java/org/embulk/output/TestGcsAuthentication.java
|
68
71
|
- src/test/java/org/embulk/output/TestGcsOutputPlugin.java
|
69
72
|
- src/test/resources/keys.tar.enc
|
70
73
|
- src/test/resources/sample_01.csv
|
71
74
|
- src/test/resources/sample_02.csv
|
72
|
-
- classpath/commons-codec-1.3.jar
|
73
|
-
- classpath/commons-logging-1.1.1.jar
|
74
|
-
- classpath/embulk-output-gcs-0.4.1.jar
|
75
|
-
- classpath/google-api-client-1.19.1.jar
|
76
75
|
- classpath/google-api-services-storage-v1-rev28-1.19.1.jar
|
76
|
+
- classpath/httpclient-4.0.1.jar
|
77
|
+
- classpath/jsr305-1.3.9.jar
|
78
|
+
- classpath/embulk-output-gcs-0.4.2.jar
|
79
|
+
- classpath/commons-logging-1.1.1.jar
|
77
80
|
- classpath/google-http-client-1.19.0.jar
|
81
|
+
- classpath/google-api-client-1.19.1.jar
|
82
|
+
- classpath/commons-codec-1.3.jar
|
83
|
+
- classpath/httpcore-4.0.1.jar
|
78
84
|
- classpath/google-http-client-jackson2-1.19.0.jar
|
79
85
|
- classpath/google-oauth-client-1.19.0.jar
|
80
|
-
- classpath/httpclient-4.0.1.jar
|
81
|
-
- classpath/httpcore-4.0.1.jar
|
82
|
-
- classpath/jsr305-1.3.9.jar
|
83
86
|
homepage: https://github.com/hakobera/embulk-output-gcs
|
84
87
|
licenses:
|
85
88
|
- MIT
|