embulk-output-larges3 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f84d3811ca05bdbd6a2ec5e15ed8f454538482c5
4
- data.tar.gz: b905b1d625e3bc5e8ec48c249152d9d969db8748
3
+ metadata.gz: d645c8ac94cb9dcbdf6a4d95b78e860d1d74dbf8
4
+ data.tar.gz: debcdf0a002be4394b7a4d5a4dd124d5cc32f3ac
5
5
  SHA512:
6
- metadata.gz: 2a4d47467b0eec06e010b19c19a65b84a5ade9312b9638756cc842435da0de6ac610398880c448176cabf92642e6e2ab7dcad6f15265132ddbdc229223214dad
7
- data.tar.gz: 97f5c770a102e8827a3b196f24fb72df1267614a711d4484098595b2f809a055f46c21e3d7afa42879f801d2a03adf0d002cb5c0d43f3b0975ad6a317bd80473
6
+ metadata.gz: 54395b570321c91ddd267ed75925350afd09ec5ed6764d729bbc103ed49d9bd66d27f1ca1a964b74c6cc11589cc7c022e2a9281defa387c7777df3f97ebce2cb
7
+ data.tar.gz: d95a396cbc798a2c6537eeb72cb0f092ea7deb442154b2a7d054b726d107ad12f6d054dde28c17d67008613f1e5fbd40604b9f531c5b12ecb0ddf337cb4a239f
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
- # Embulk Output Large S3 output plugin for Embulk
1
+ # Large S3 File Output Plugin for Embulk
2
+ Embulk File Output Plugin: Handle and Upload really large files to AWS S3 using multipart upload.
3
+ This plugin is an extension of classic s3 output plugin. [https://github.com/llibra/embulk-output-s3]
4
+ ## Developers
2
5
 
3
- TODO: Write short description here and build.gradle file.
6
+ * Angelos Alexopoulos <alexopoulos7@gmail.com>
4
7
 
5
8
  ## Overview
6
9
 
@@ -11,22 +14,52 @@ TODO: Write short description here and build.gradle file.
11
14
 
12
15
  ## Configuration
13
16
 
14
- - **option1**: description (integer, required)
15
- - **option2**: description (string, default: `"myvalue"`)
16
- - **option3**: description (string, default: `null`)
17
+ - **path_prefix**: prefix of target keys (string, required)
18
+ - **file_ext**: suffix of target keys (string, required)
19
+ - **sequence_format**: format for sequence part of target keys (string, default: '.%03d.%02d')
20
+ - **bucket**: S3 bucket name (string, required)
21
+ - **endpoint**: S3 endpoint login user name (string, optional)
22
+ - **access_key_id**: AWS access key id. This parameter is required when your agent is not running on EC2 instance with an IAM Role. (string, defualt: null)
23
+ - **secret_access_key**: AWS secret key. This parameter is required when your agent is not running on EC2 instance with an IAM Role. (string, defualt: null)
24
+ - **tmp_path**: temporary file directory. If null, it is associated with the default FileSystem. (string, default: null)
25
+ - **tmp_path_prefix**: prefix of temporary files (string, default: 'embulk-output-s3-')
26
+ - **canned_acl**: canned access control list for created objects ([enum](#cannedaccesscontrollist), default: null)
27
+ - **proxy_host**: proxy host to use when accessing AWS S3 via proxy. (string, default: null )
28
+ - **proxy_port**: proxy port to use when accessing AWS S3 via proxy. (string, default: null )
29
+ - **part_size**: Size in Bytes of each part for multipart upload to S3, defaults to 50 MB (int, default: 52428800 )
30
+
31
+ ### CannedAccessControlList
32
+ you can choose one of the below list.
33
+
34
+ - AuthenticatedRead
35
+ - AwsExecRead
36
+ - BucketOwnerFullControl
37
+ - BucketOwnerRead
38
+ - LogDeliveryWrite
39
+ - Private
40
+ - PublicRead
41
+ - PublicReadWrite
42
+
43
+ cf. http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/CannedAccessControlList.html
17
44
 
18
45
  ## Example
19
46
 
20
47
  ```yaml
21
48
  out:
22
- type: embulk_output_larges3
23
- option1: example1
24
- option2: example2
49
+ type: larges3
50
+ path_prefix: logs/out
51
+ file_ext: .csv
52
+ bucket: my-s3-bucket
53
+ endpoint: s3-us-west-1.amazonaws.com
54
+ access_key_id: ABCXYZ123ABCXYZ123
55
+ secret_access_key: AbCxYz123aBcXyZ123
56
+ formatter:
57
+ type: csv
25
58
  ```
26
59
 
27
60
 
28
61
  ## Build
29
62
 
30
63
  ```
31
- $ ./gradlew gem # -t to watch change of files and rebuild continuously
32
- ```
64
+ $ ./gradlew gem
65
+ ```
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.1.2"
16
+ version = "0.1.3"
17
17
 
18
18
  sourceCompatibility = 1.8
19
19
  targetCompatibility = 1.8
@@ -10,6 +10,7 @@ import java.util.IllegalFormatException;
10
10
  import java.util.List;
11
11
  import java.util.ArrayList;
12
12
  import java.util.Locale;
13
+ import java.lang.Long;
13
14
 
14
15
  import org.embulk.config.Config;
15
16
  import org.embulk.config.ConfigDefault;
@@ -89,6 +90,10 @@ public class Larges3FileOutputPlugin
89
90
  @Config("canned_acl")
90
91
  @ConfigDefault("null")
91
92
  Optional<CannedAccessControlList> getCannedAccessControlList();
93
+
94
+ @Config("part_size")
95
+ @ConfigDefault("52428800")
96
+ Optional<String> getPartSize();
92
97
  }
93
98
 
94
99
  public static class S3FileOutput
@@ -102,6 +107,7 @@ public class Larges3FileOutputPlugin
102
107
  private final String sequenceFormat;
103
108
  private final String fileNameExtension;
104
109
  private final String tempPathPrefix;
110
+ private final String partSize;
105
111
  private final Optional<CannedAccessControlList> cannedAccessControlListOptional;
106
112
 
107
113
  private int taskIndex;
@@ -153,6 +159,7 @@ public class Larges3FileOutputPlugin
153
159
  this.sequenceFormat = task.getSequenceFormat();
154
160
  this.fileNameExtension = task.getFileNameExtension();
155
161
  this.tempPathPrefix = task.getTempPathPrefix();
162
+ this.partSize = task.getPartSize().get();
156
163
  if (task.getTempPath().isPresent()) {
157
164
  this.tempPath = task.getTempPath().get();
158
165
  }
@@ -201,13 +208,13 @@ public class Larges3FileOutputPlugin
201
208
 
202
209
  File file = new File(from.toString());
203
210
  long contentLength = file.length();
204
- long partSize = 52428800; // Set part size to 50 MB.
211
+ long partSize = Long.parseLong(this.partSize); // Set part size to 50 MB.
205
212
 
206
213
  try {
207
214
  // Step 2: Upload parts.
208
215
  long filePosition = 0;
209
216
  for (int i = 1; filePosition < contentLength; i++) {
210
- // Last part can be less than 5 MB. Adjust part size.
217
+ // Last part can be less than 50 MB. Adjust part size.
211
218
  partSize = Math.min(partSize, (contentLength - filePosition));
212
219
 
213
220
  // Create request to upload a part.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-larges3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Angelos Alexopoulos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-18 00:00:00.000000000 Z
11
+ date: 2018-04-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -67,9 +67,9 @@ files:
67
67
  - classpath/httpclient-4.5.2.jar
68
68
  - classpath/commons-codec-1.9.jar
69
69
  - classpath/aws-java-sdk-s3-1.11.271.jar
70
+ - classpath/embulk-output-larges3-0.1.3.jar
70
71
  - classpath/jmespath-java-1.11.271.jar
71
72
  - classpath/jackson-dataformat-cbor-2.6.7.jar
72
- - classpath/embulk-output-larges3-0.1.2.jar
73
73
  - classpath/jackson-databind-2.6.7.1.jar
74
74
  - classpath/httpcore-4.4.4.jar
75
75
  - classpath/aws-java-sdk-kms-1.11.271.jar