embulk-output-gcs 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b41fdc69131fe0099ff844875bafa6d160510e10
4
- data.tar.gz: 5b084d56f12509c0c273ea295519a1534d310579
3
+ metadata.gz: c0d6f9617394f320742be74ac318d99ef0f93c06
4
+ data.tar.gz: af9f3a3679a2d76b5c2944fd7e8264ecab3b3a27
5
5
  SHA512:
6
- metadata.gz: a7f51eec122063edaffe94aa065f9bdd1dcc09752d46296f3f8d276d5af250ef82f1a2f56b7e0163f646c98731954d4bf5ce2b901f4e22b4e09033fd20fbf627
7
- data.tar.gz: 7ed822140abca79f035a9bf59af3d472472b6bdfa4c1db505a22ae4ca0a80ca1fd380635487f72eb8fd74f706fd1e531fdf421f63b1614f6fcc5731b319c1489
6
+ metadata.gz: 086960d449e95821defa56b6860eaf2a45fe753db3c7d763272c694ae5fd87b07aba937bb170f748b6d779775e71145c9ff350528d12ea6d770e64d40b67f3ec
7
+ data.tar.gz: 99a7bc4c683d61b500780c316be1c7861181cccde8f20daf6b674e96068a2fc9414db5b830013c0f6c223b33d2cc7a4d164bc62536ed0082ba419754ac8a7980
data/.travis.yml CHANGED
@@ -1,8 +1,6 @@
1
1
  language: java
2
2
  jdk:
3
3
  - oraclejdk8
4
- - oraclejdk7
5
- - openjdk7
6
4
  env:
7
5
  global:
8
6
  - GCP_EMAIL=account-2@embulk-output-gcs-test.iam.gserviceaccount.com
data/build.gradle CHANGED
@@ -14,10 +14,10 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- sourceCompatibility = 1.7
18
- targetCompatibility = 1.7
17
+ sourceCompatibility = 1.8
18
+ targetCompatibility = 1.8
19
19
 
20
- version = "0.4.1"
20
+ version = "0.4.2"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.6"
Binary file
@@ -1,6 +1,6 @@
1
- #Wed Jan 13 12:41:02 JST 2016
1
+ #Sun Jan 08 00:35:58 PST 2017
2
2
  distributionBase=GRADLE_USER_HOME
3
3
  distributionPath=wrapper/dists
4
4
  zipStoreBase=GRADLE_USER_HOME
5
5
  zipStorePath=wrapper/dists
6
- distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-bin.zip
@@ -0,0 +1,20 @@
1
+ package org.embulk.output;
2
+
3
+ public enum AuthMethod
4
+ {
5
+ private_key("private_key"),
6
+ compute_engine("compute_engine"),
7
+ json_key("json_key");
8
+
9
+ private final String string;
10
+
11
+ AuthMethod(String string)
12
+ {
13
+ this.string = string;
14
+ }
15
+
16
+ public String getString()
17
+ {
18
+ return string;
19
+ }
20
+ }
@@ -1,102 +1,25 @@
1
1
  package org.embulk.output;
2
2
 
3
- import com.google.api.client.http.InputStreamContent;
4
- import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
5
3
  import com.google.api.services.storage.Storage;
6
- import com.google.api.services.storage.model.StorageObject;
7
4
  import com.google.common.base.Function;
8
5
  import com.google.common.base.Optional;
9
6
  import com.google.common.base.Throwables;
10
- import org.embulk.config.Config;
11
- import org.embulk.config.ConfigDefault;
12
7
  import org.embulk.config.ConfigDiff;
13
8
  import org.embulk.config.ConfigException;
14
9
  import org.embulk.config.ConfigSource;
15
- import org.embulk.config.Task;
16
10
  import org.embulk.config.TaskReport;
17
11
  import org.embulk.config.TaskSource;
18
- import org.embulk.spi.Buffer;
19
12
  import org.embulk.spi.Exec;
20
13
  import org.embulk.spi.FileOutputPlugin;
21
14
  import org.embulk.spi.TransactionalFileOutput;
22
15
  import org.embulk.spi.unit.LocalFile;
23
- import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
24
- import org.embulk.spi.util.RetryExecutor.Retryable;
25
- import org.slf4j.Logger;
26
- import static org.embulk.spi.util.RetryExecutor.retryExecutor;
27
16
 
28
- import java.io.BufferedInputStream;
29
- import java.io.BufferedOutputStream;
30
- import java.io.File;
31
- import java.io.FileInputStream;
32
- import java.io.FileOutputStream;
33
17
  import java.io.IOException;
34
- import java.io.InterruptedIOException;
35
18
  import java.security.GeneralSecurityException;
36
- import java.security.MessageDigest;
37
- import java.security.NoSuchAlgorithmException;
38
- import java.util.ArrayList;
39
19
  import java.util.List;
40
- import java.util.concurrent.Callable;
41
- import java.util.concurrent.ExecutionException;
42
- import java.util.concurrent.ExecutorService;
43
- import java.util.concurrent.Executors;
44
- import java.util.concurrent.Future;
45
20
 
46
21
  public class GcsOutputPlugin implements FileOutputPlugin
47
22
  {
48
- private static final Logger logger = Exec.getLogger(GcsOutputPlugin.class);
49
-
50
- public interface PluginTask extends Task
51
- {
52
- @Config("bucket")
53
- String getBucket();
54
-
55
- @Config("path_prefix")
56
- String getPathPrefix();
57
-
58
- @Config("file_ext")
59
- String getFileNameExtension();
60
-
61
- @Config("sequence_format")
62
- @ConfigDefault("\".%03d.%02d\"")
63
- String getSequenceFormat();
64
-
65
- @Config("content_type")
66
- @ConfigDefault("\"application/octet-stream\"")
67
- String getContentType();
68
-
69
- @Config("auth_method")
70
- @ConfigDefault("\"private_key\"")
71
- AuthMethod getAuthMethod();
72
-
73
- @Config("service_account_email")
74
- @ConfigDefault("null")
75
- Optional<String> getServiceAccountEmail();
76
-
77
- // kept for backward compatibility
78
- @Config("p12_keyfile_path")
79
- @ConfigDefault("null")
80
- Optional<String> getP12KeyfilePath();
81
-
82
- @Config("p12_keyfile")
83
- @ConfigDefault("null")
84
- Optional<LocalFile> getP12Keyfile();
85
- void setP12Keyfile(Optional<LocalFile> p12Keyfile);
86
-
87
- @Config("json_keyfile")
88
- @ConfigDefault("null")
89
- Optional<LocalFile> getJsonKeyfile();
90
-
91
- @Config("application_name")
92
- @ConfigDefault("\"embulk-output-gcs\"")
93
- String getApplicationName();
94
-
95
- @Config("max_connection_retry")
96
- @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
97
- int getMaxConnectionRetry();
98
- }
99
-
100
23
  @Override
101
24
  public ConfigDiff transaction(ConfigSource config,
102
25
  int taskCount,
@@ -152,7 +75,7 @@ public class GcsOutputPlugin implements FileOutputPlugin
152
75
  PluginTask task = taskSource.loadTask(PluginTask.class);
153
76
 
154
77
  Storage client = createClient(task);
155
- return new TransactionalGcsFileOutput(task, client, taskIndex);
78
+ return new GcsTransactionalFileOutput(task, client, taskIndex);
156
79
  }
157
80
 
158
81
  private GcsAuthentication newGcsAuth(PluginTask task)
@@ -192,262 +115,4 @@ public class GcsOutputPlugin implements FileOutputPlugin
192
115
  }
193
116
  };
194
117
  }
195
-
196
- static class TransactionalGcsFileOutput implements TransactionalFileOutput
197
- {
198
- private final int taskIndex;
199
- private final Storage client;
200
- private final String bucket;
201
- private final String pathPrefix;
202
- private final String pathSuffix;
203
- private final String sequenceFormat;
204
- private final String contentType;
205
- private final int maxConnectionRetry;
206
- private final List<StorageObject> storageObjects = new ArrayList<>();
207
-
208
- private int fileIndex = 0;
209
- private int callCount = 0;
210
- private BufferedOutputStream currentStream = null;
211
- private Future<StorageObject> currentUpload = null;
212
- private File tempFile = null;
213
-
214
- TransactionalGcsFileOutput(PluginTask task, Storage client, int taskIndex)
215
- {
216
- this.taskIndex = taskIndex;
217
- this.client = client;
218
- this.bucket = task.getBucket();
219
- this.pathPrefix = task.getPathPrefix();
220
- this.pathSuffix = task.getFileNameExtension();
221
- this.sequenceFormat = task.getSequenceFormat();
222
- this.contentType = task.getContentType();
223
- this.maxConnectionRetry = task.getMaxConnectionRetry();
224
- }
225
-
226
- public void nextFile()
227
- {
228
- closeCurrentUpload();
229
- try {
230
- tempFile = Exec.getTempFileSpace().createTempFile();
231
- currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
232
- fileIndex++;
233
- }
234
- catch (IOException ex) {
235
- Throwables.propagate(ex);
236
- }
237
- }
238
-
239
- @Override
240
- public void add(Buffer buffer)
241
- {
242
- try {
243
- logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
244
- currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
245
- callCount++;
246
- }
247
- catch (IOException ex) {
248
- throw new RuntimeException(ex);
249
- }
250
- finally {
251
- buffer.release();
252
- }
253
- }
254
-
255
- @Override
256
- public void finish()
257
- {
258
- String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
259
- close();
260
- if (tempFile != null) {
261
- currentUpload = startUpload(path);
262
- }
263
-
264
- closeCurrentUpload();
265
- }
266
-
267
- @Override
268
- public void close()
269
- {
270
- try {
271
- if (currentStream != null) {
272
- currentStream.close();
273
- currentStream = null;
274
- }
275
- }
276
- catch (IOException ex) {
277
- throw Throwables.propagate(ex);
278
- }
279
- }
280
-
281
- @Override
282
- public void abort()
283
- {
284
- }
285
-
286
- @Override
287
- public TaskReport commit()
288
- {
289
- TaskReport report = Exec.newTaskReport();
290
- report.set("files", storageObjects);
291
- return report;
292
- }
293
-
294
- private void closeCurrentUpload()
295
- {
296
- try {
297
- if (currentUpload != null) {
298
- StorageObject obj = currentUpload.get();
299
- storageObjects.add(obj);
300
- logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
301
- currentUpload = null;
302
- }
303
-
304
- callCount = 0;
305
- }
306
- catch (InterruptedException | ExecutionException ex) {
307
- throw Throwables.propagate(ex);
308
- }
309
- }
310
-
311
- private Future<StorageObject> startUpload(final String path)
312
- {
313
- try {
314
- final ExecutorService executor = Executors.newCachedThreadPool();
315
- final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
316
-
317
- return executor.submit(new Callable<StorageObject>() {
318
- @Override
319
- public StorageObject call() throws IOException
320
- {
321
- try {
322
- logger.info("Uploading '{}/{}'", bucket, path);
323
- return execUploadWithRetry(path, hash);
324
- }
325
- finally {
326
- executor.shutdown();
327
- }
328
- }
329
- });
330
- }
331
- catch (IOException ex) {
332
- throw Throwables.propagate(ex);
333
- }
334
- }
335
-
336
- private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
337
- {
338
- try {
339
- return retryExecutor()
340
- .withRetryLimit(maxConnectionRetry)
341
- .withInitialRetryWait(500)
342
- .withMaxRetryWait(30 * 1000)
343
- .runInterruptible(new Retryable<StorageObject>() {
344
- @Override
345
- public StorageObject call() throws IOException, RetryGiveupException
346
- {
347
- try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
348
- InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
349
- mediaContent.setCloseInputStream(true);
350
-
351
- StorageObject objectMetadata = new StorageObject();
352
- objectMetadata.setName(path);
353
-
354
- final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
355
- insert.setDisableGZipContent(true);
356
- StorageObject obj = insert.execute();
357
-
358
- logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
359
- return obj;
360
- }
361
- }
362
-
363
- @Override
364
- public boolean isRetryableException(Exception exception)
365
- {
366
- return true;
367
- }
368
-
369
- @Override
370
- public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
371
- {
372
- String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
373
- retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
374
- if (retryCount % 3 == 0) {
375
- logger.warn(message, exception);
376
- }
377
- else {
378
- logger.warn(message);
379
- }
380
- }
381
-
382
- @Override
383
- public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
384
- {
385
- }
386
- });
387
- }
388
- catch (RetryGiveupException ex) {
389
- throw Throwables.propagate(ex.getCause());
390
- }
391
- catch (InterruptedException ex) {
392
- throw new InterruptedIOException();
393
- }
394
- }
395
-
396
- /*
397
- MD5 hash sum on GCS bucket is encoded with base64.
398
- You can get same hash with following commands.
399
- $ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
400
- or
401
- $ gsutil hash -m /path/to/file.txt
402
- */
403
- private String getLocalMd5hash(String filePath) throws IOException
404
- {
405
- try {
406
- MessageDigest md = MessageDigest.getInstance("MD5");
407
- try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
408
- byte[] buffer = new byte[256];
409
- int len;
410
- while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
411
- md.update(buffer, 0, len);
412
- }
413
- return new String(Base64.encodeBase64(md.digest()));
414
- }
415
- }
416
- catch (NoSuchAlgorithmException ex) {
417
- throw new ConfigException("MD5 algorism not found");
418
- }
419
- }
420
- }
421
-
422
- /**
423
- * GCS has character limitation in object names.
424
- * @see https://cloud.google.com/storage/docs/naming#objectnames
425
- * Although "." isn't listed at above pages, we can't access "./" path from GUI console.
426
- * And in many cases, user don't intend of creating "/" directory under the bucket.
427
- * This method normalizes path when it contains "./" and "/" and its variations at the beginning
428
- */
429
- private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
430
- {
431
- String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
432
- return path.replaceFirst("^\\.*/*", "");
433
- }
434
-
435
- public enum AuthMethod
436
- {
437
- private_key("private_key"),
438
- compute_engine("compute_engine"),
439
- json_key("json_key");
440
-
441
- private final String string;
442
-
443
- AuthMethod(String string)
444
- {
445
- this.string = string;
446
- }
447
-
448
- public String getString()
449
- {
450
- return string;
451
- }
452
- }
453
118
  }
@@ -0,0 +1,252 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.api.client.http.InputStreamContent;
4
+ import com.google.api.client.repackaged.org.apache.commons.codec.binary.Base64;
5
+ import com.google.api.services.storage.Storage;
6
+ import com.google.api.services.storage.model.StorageObject;
7
+ import com.google.common.base.Throwables;
8
+ import org.embulk.config.ConfigException;
9
+ import org.embulk.config.TaskReport;
10
+ import org.embulk.spi.Buffer;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.TransactionalFileOutput;
13
+ import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
14
+ import org.embulk.spi.util.RetryExecutor.Retryable;
15
+ import org.slf4j.Logger;
16
+
17
+ import java.io.BufferedInputStream;
18
+ import java.io.BufferedOutputStream;
19
+ import java.io.File;
20
+ import java.io.FileInputStream;
21
+ import java.io.FileOutputStream;
22
+ import java.io.IOException;
23
+ import java.io.InterruptedIOException;
24
+ import java.security.MessageDigest;
25
+ import java.security.NoSuchAlgorithmException;
26
+ import java.util.ArrayList;
27
+ import java.util.List;
28
+
29
+ import static org.embulk.spi.util.RetryExecutor.retryExecutor;
30
+
31
+ public class GcsTransactionalFileOutput implements TransactionalFileOutput
32
+ {
33
+ private static final Logger logger = Exec.getLogger(GcsTransactionalFileOutput.class);
34
+
35
+ private final int taskIndex;
36
+ private final Storage client;
37
+ private final String bucket;
38
+ private final String pathPrefix;
39
+ private final String pathSuffix;
40
+ private final String sequenceFormat;
41
+ private final String contentType;
42
+ private final int maxConnectionRetry;
43
+ private final List<StorageObject> storageObjects = new ArrayList<>();
44
+
45
+ private int fileIndex = 0;
46
+ private int callCount = 0;
47
+ private BufferedOutputStream currentStream = null;
48
+ private StorageObject currentUpload = null;
49
+ private File tempFile = null;
50
+
51
+ GcsTransactionalFileOutput(PluginTask task, Storage client, int taskIndex)
52
+ {
53
+ this.taskIndex = taskIndex;
54
+ this.client = client;
55
+ this.bucket = task.getBucket();
56
+ this.pathPrefix = task.getPathPrefix();
57
+ this.pathSuffix = task.getFileNameExtension();
58
+ this.sequenceFormat = task.getSequenceFormat();
59
+ this.contentType = task.getContentType();
60
+ this.maxConnectionRetry = task.getMaxConnectionRetry();
61
+ }
62
+
63
+ public void nextFile()
64
+ {
65
+ closeCurrentUpload();
66
+ try {
67
+ tempFile = Exec.getTempFileSpace().createTempFile();
68
+ currentStream = new BufferedOutputStream(new FileOutputStream(tempFile));
69
+ fileIndex++;
70
+ }
71
+ catch (IOException ex) {
72
+ Throwables.propagate(ex);
73
+ }
74
+ }
75
+
76
+ @Override
77
+ public void add(Buffer buffer)
78
+ {
79
+ try {
80
+ logger.debug("#add called {} times for taskIndex {}", callCount, taskIndex);
81
+ currentStream.write(buffer.array(), buffer.offset(), buffer.limit());
82
+ callCount++;
83
+ }
84
+ catch (IOException ex) {
85
+ throw new RuntimeException(ex);
86
+ }
87
+ finally {
88
+ buffer.release();
89
+ }
90
+ }
91
+
92
+ @Override
93
+ public void finish()
94
+ {
95
+ String path = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
96
+ close();
97
+ if (tempFile != null) {
98
+ currentUpload = startUpload(path);
99
+ }
100
+
101
+ closeCurrentUpload();
102
+ }
103
+
104
+ @Override
105
+ public void close()
106
+ {
107
+ try {
108
+ if (currentStream != null) {
109
+ currentStream.close();
110
+ currentStream = null;
111
+ }
112
+ }
113
+ catch (IOException ex) {
114
+ throw Throwables.propagate(ex);
115
+ }
116
+ }
117
+
118
+ @Override
119
+ public void abort()
120
+ {
121
+ }
122
+
123
+ @Override
124
+ public TaskReport commit()
125
+ {
126
+ TaskReport report = Exec.newTaskReport();
127
+ report.set("files", storageObjects);
128
+ return report;
129
+ }
130
+
131
+ private void closeCurrentUpload()
132
+ {
133
+ if (currentUpload != null) {
134
+ StorageObject obj = currentUpload;
135
+ storageObjects.add(obj);
136
+ logger.info("Uploaded '{}/{}' to {}bytes", obj.getBucket(), obj.getName(), obj.getSize());
137
+ currentUpload = null;
138
+ }
139
+
140
+ callCount = 0;
141
+ }
142
+
143
+ private StorageObject startUpload(final String path)
144
+ {
145
+ try {
146
+ final String hash = getLocalMd5hash(tempFile.getAbsolutePath());
147
+
148
+ return execUploadWithRetry(path, hash);
149
+ }
150
+ catch (IOException ex) {
151
+ throw Throwables.propagate(ex);
152
+ }
153
+ }
154
+
155
+ private StorageObject execUploadWithRetry(final String path, final String localHash) throws IOException
156
+ {
157
+ try {
158
+ return retryExecutor()
159
+ .withRetryLimit(maxConnectionRetry)
160
+ .withInitialRetryWait(500)
161
+ .withMaxRetryWait(30 * 1000)
162
+ .runInterruptible(new Retryable<StorageObject>() {
163
+ @Override
164
+ public StorageObject call() throws IOException
165
+ {
166
+ try (final BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(tempFile))) {
167
+ InputStreamContent mediaContent = new InputStreamContent(contentType, inputStream);
168
+ mediaContent.setCloseInputStream(true);
169
+
170
+ StorageObject objectMetadata = new StorageObject();
171
+ objectMetadata.setName(path);
172
+
173
+ final Storage.Objects.Insert insert = client.objects().insert(bucket, objectMetadata, mediaContent);
174
+ insert.setDisableGZipContent(true);
175
+ StorageObject obj = insert.execute();
176
+
177
+ logger.info(String.format("Local Hash(MD5): %s / Remote Hash(MD5): %s", localHash, obj.getMd5Hash()));
178
+ return obj;
179
+ }
180
+ }
181
+
182
+ @Override
183
+ public boolean isRetryableException(Exception exception)
184
+ {
185
+ return true;
186
+ }
187
+
188
+ @Override
189
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) throws RetryGiveupException
190
+ {
191
+ String message = String.format("GCS put request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
192
+ retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
193
+ if (retryCount % 3 == 0) {
194
+ logger.warn(message, exception);
195
+ }
196
+ else {
197
+ logger.warn(message);
198
+ }
199
+ }
200
+
201
+ @Override
202
+ public void onGiveup(Exception firstException, Exception lastException) throws RetryGiveupException
203
+ {
204
+ }
205
+ });
206
+ }
207
+ catch (RetryGiveupException ex) {
208
+ throw Throwables.propagate(ex.getCause());
209
+ }
210
+ catch (InterruptedException ex) {
211
+ throw new InterruptedIOException();
212
+ }
213
+ }
214
+
215
+ /*
216
+ MD5 hash sum on GCS bucket is encoded with base64.
217
+ You can get same hash with following commands.
218
+ $ openssl dgst -md5 -binary /path/to/file.txt | openssl enc -base64
219
+ or
220
+ $ gsutil hash -m /path/to/file.txt
221
+ */
222
+ private String getLocalMd5hash(String filePath) throws IOException
223
+ {
224
+ try {
225
+ MessageDigest md = MessageDigest.getInstance("MD5");
226
+ try (BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File(filePath)))) {
227
+ byte[] buffer = new byte[256];
228
+ int len;
229
+ while ((len = input.read(buffer, 0, buffer.length)) >= 0) {
230
+ md.update(buffer, 0, len);
231
+ }
232
+ return new String(Base64.encodeBase64(md.digest()));
233
+ }
234
+ }
235
+ catch (NoSuchAlgorithmException ex) {
236
+ throw new ConfigException("MD5 algorism not found");
237
+ }
238
+ }
239
+
240
+ /**
241
+ * GCS has character limitation in object names.
242
+ * @see https://cloud.google.com/storage/docs/naming#objectnames
243
+ * Although "." isn't listed at above pages, we can't access "./" path from GUI console.
244
+ * And in many cases, user don't intend of creating "/" directory under the bucket.
245
+ * This method normalizes path when it contains "./" and "/" and its variations at the beginning
246
+ */
247
+ private static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
248
+ {
249
+ String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
250
+ return path.replaceFirst("^\\.*/*", "");
251
+ }
252
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.spi.unit.LocalFile;
8
+
9
+ public interface PluginTask extends Task
10
+ {
11
+ @Config("bucket")
12
+ String getBucket();
13
+
14
+ @Config("path_prefix")
15
+ String getPathPrefix();
16
+
17
+ @Config("file_ext")
18
+ String getFileNameExtension();
19
+
20
+ @Config("sequence_format")
21
+ @ConfigDefault("\".%03d.%02d\"")
22
+ String getSequenceFormat();
23
+
24
+ @Config("content_type")
25
+ @ConfigDefault("\"application/octet-stream\"")
26
+ String getContentType();
27
+
28
+ @Config("auth_method")
29
+ @ConfigDefault("\"private_key\"")
30
+ AuthMethod getAuthMethod();
31
+
32
+ @Config("service_account_email")
33
+ @ConfigDefault("null")
34
+ Optional<String> getServiceAccountEmail();
35
+
36
+ // kept for backward compatibility
37
+ @Config("p12_keyfile_path")
38
+ @ConfigDefault("null")
39
+ Optional<String> getP12KeyfilePath();
40
+
41
+ @Config("p12_keyfile")
42
+ @ConfigDefault("null")
43
+ Optional<LocalFile> getP12Keyfile();
44
+ void setP12Keyfile(Optional<LocalFile> p12Keyfile);
45
+
46
+ @Config("json_keyfile")
47
+ @ConfigDefault("null")
48
+ Optional<LocalFile> getJsonKeyfile();
49
+
50
+ @Config("application_name")
51
+ @ConfigDefault("\"embulk-output-gcs\"")
52
+ String getApplicationName();
53
+
54
+ @Config("max_connection_retry")
55
+ @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
56
+ int getMaxConnectionRetry();
57
+ }
@@ -11,7 +11,6 @@ import org.embulk.config.ConfigException;
11
11
  import org.embulk.config.ConfigSource;
12
12
  import org.embulk.config.TaskReport;
13
13
  import org.embulk.config.TaskSource;
14
- import org.embulk.output.GcsOutputPlugin.PluginTask;
15
14
  import org.embulk.spi.Buffer;
16
15
  import org.embulk.spi.Exec;
17
16
  import org.embulk.spi.FileOutputPlugin;
@@ -98,7 +97,7 @@ public class TestGcsOutputPlugin
98
97
  .set("file_ext", ".csv")
99
98
  .set("formatter", formatterConfig());
100
99
 
101
- GcsOutputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
100
+ PluginTask task = config.loadConfig(PluginTask.class);
102
101
  assertEquals("private_key", task.getAuthMethod().toString());
103
102
  }
104
103
 
@@ -284,7 +283,7 @@ public class TestGcsOutputPlugin
284
283
  {
285
284
  ConfigSource configSource = config();
286
285
  PluginTask task = configSource.loadConfig(PluginTask.class);
287
- Method method = GcsOutputPlugin.class.getDeclaredMethod("generateRemotePath", String.class, String.class, int.class, int.class, String.class);
286
+ Method method = GcsTransactionalFileOutput.class.getDeclaredMethod("generateRemotePath", String.class, String.class, int.class, int.class, String.class);
288
287
  method.setAccessible(true);
289
288
  assertEquals("sample.000.01.csv", method.invoke(plugin, "/sample", task.getSequenceFormat(), 0, 1, ".csv"));
290
289
  assertEquals("sample.000.01.csv", method.invoke(plugin, "./sample", task.getSequenceFormat(), 0, 1, ".csv"));
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-gcs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuyuki Honda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-29 00:00:00.000000000 Z
11
+ date: 2018-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,24 +62,27 @@ files:
62
62
  - gradlew
63
63
  - gradlew.bat
64
64
  - lib/embulk/output/gcs.rb
65
+ - src/main/java/org/embulk/output/AuthMethod.java
65
66
  - src/main/java/org/embulk/output/GcsAuthentication.java
66
67
  - src/main/java/org/embulk/output/GcsOutputPlugin.java
68
+ - src/main/java/org/embulk/output/GcsTransactionalFileOutput.java
69
+ - src/main/java/org/embulk/output/PluginTask.java
67
70
  - src/test/java/org/embulk/output/TestGcsAuthentication.java
68
71
  - src/test/java/org/embulk/output/TestGcsOutputPlugin.java
69
72
  - src/test/resources/keys.tar.enc
70
73
  - src/test/resources/sample_01.csv
71
74
  - src/test/resources/sample_02.csv
72
- - classpath/commons-codec-1.3.jar
73
- - classpath/commons-logging-1.1.1.jar
74
- - classpath/embulk-output-gcs-0.4.1.jar
75
- - classpath/google-api-client-1.19.1.jar
76
75
  - classpath/google-api-services-storage-v1-rev28-1.19.1.jar
76
+ - classpath/httpclient-4.0.1.jar
77
+ - classpath/jsr305-1.3.9.jar
78
+ - classpath/embulk-output-gcs-0.4.2.jar
79
+ - classpath/commons-logging-1.1.1.jar
77
80
  - classpath/google-http-client-1.19.0.jar
81
+ - classpath/google-api-client-1.19.1.jar
82
+ - classpath/commons-codec-1.3.jar
83
+ - classpath/httpcore-4.0.1.jar
78
84
  - classpath/google-http-client-jackson2-1.19.0.jar
79
85
  - classpath/google-oauth-client-1.19.0.jar
80
- - classpath/httpclient-4.0.1.jar
81
- - classpath/httpcore-4.0.1.jar
82
- - classpath/jsr305-1.3.9.jar
83
86
  homepage: https://github.com/hakobera/embulk-output-gcs
84
87
  licenses:
85
88
  - MIT