embulk-output-larges3 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d645c8ac94cb9dcbdf6a4d95b78e860d1d74dbf8
|
4
|
+
data.tar.gz: debcdf0a002be4394b7a4d5a4dd124d5cc32f3ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54395b570321c91ddd267ed75925350afd09ec5ed6764d729bbc103ed49d9bd66d27f1ca1a964b74c6cc11589cc7c022e2a9281defa387c7777df3f97ebce2cb
|
7
|
+
data.tar.gz: d95a396cbc798a2c6537eeb72cb0f092ea7deb442154b2a7d054b726d107ad12f6d054dde28c17d67008613f1e5fbd40604b9f531c5b12ecb0ddf337cb4a239f
|
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
#
|
1
|
+
# Large S3 File Output Plugin for Embulk
|
2
|
+
Embulk File Output Plugin: Handle and Upload really large files to AWS S3 using multipart upload.
|
3
|
+
This plugin is an extension of classic s3 output plugin. [https://github.com/llibra/embulk-output-s3]
|
4
|
+
## Developers
|
2
5
|
|
3
|
-
|
6
|
+
* Angelos Alexopoulos <alexopoulos7@gmail.com>
|
4
7
|
|
5
8
|
## Overview
|
6
9
|
|
@@ -11,22 +14,52 @@ TODO: Write short description here and build.gradle file.
|
|
11
14
|
|
12
15
|
## Configuration
|
13
16
|
|
14
|
-
- **
|
15
|
-
- **
|
16
|
-
- **
|
17
|
+
- **path_prefix**: prefix of target keys (string, required)
|
18
|
+
- **file_ext**: suffix of target keys (string, required)
|
19
|
+
- **sequence_format**: format for sequence part of target keys (string, default: '.%03d.%02d')
|
20
|
+
- **bucket**: S3 bucket name (string, required)
|
21
|
+
- **endpoint**: S3 endpoint login user name (string, optional)
|
22
|
+
- **access_key_id**: AWS access key id. This parameter is required when your agent is not running on EC2 instance with an IAM Role. (string, defualt: null)
|
23
|
+
- **secret_access_key**: AWS secret key. This parameter is required when your agent is not running on EC2 instance with an IAM Role. (string, defualt: null)
|
24
|
+
- **tmp_path**: temporary file directory. If null, it is associated with the default FileSystem. (string, default: null)
|
25
|
+
- **tmp_path_prefix**: prefix of temporary files (string, default: 'embulk-output-s3-')
|
26
|
+
- **canned_acl**: canned access control list for created objects ([enum](#cannedaccesscontrollist), default: null)
|
27
|
+
- **proxy_host**: proxy host to use when accessing AWS S3 via proxy. (string, default: null )
|
28
|
+
- **proxy_port**: proxy port to use when accessing AWS S3 via proxy. (string, default: null )
|
29
|
+
- **part_size**: Size in Bytes of each part for multipart upload to S3, defaults to 50 MB (int, default: 52428800 )
|
30
|
+
|
31
|
+
### CannedAccessControlList
|
32
|
+
you can choose one of the below list.
|
33
|
+
|
34
|
+
- AuthenticatedRead
|
35
|
+
- AwsExecRead
|
36
|
+
- BucketOwnerFullControl
|
37
|
+
- BucketOwnerRead
|
38
|
+
- LogDeliveryWrite
|
39
|
+
- Private
|
40
|
+
- PublicRead
|
41
|
+
- PublicReadWrite
|
42
|
+
|
43
|
+
cf. http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/model/CannedAccessControlList.html
|
17
44
|
|
18
45
|
## Example
|
19
46
|
|
20
47
|
```yaml
|
21
48
|
out:
|
22
|
-
type:
|
23
|
-
|
24
|
-
|
49
|
+
type: larges3
|
50
|
+
path_prefix: logs/out
|
51
|
+
file_ext: .csv
|
52
|
+
bucket: my-s3-bucket
|
53
|
+
endpoint: s3-us-west-1.amazonaws.com
|
54
|
+
access_key_id: ABCXYZ123ABCXYZ123
|
55
|
+
secret_access_key: AbCxYz123aBcXyZ123
|
56
|
+
formatter:
|
57
|
+
type: csv
|
25
58
|
```
|
26
59
|
|
27
60
|
|
28
61
|
## Build
|
29
62
|
|
30
63
|
```
|
31
|
-
$ ./gradlew gem
|
32
|
-
```
|
64
|
+
$ ./gradlew gem
|
65
|
+
```
|
data/build.gradle
CHANGED
Binary file
|
@@ -10,6 +10,7 @@ import java.util.IllegalFormatException;
|
|
10
10
|
import java.util.List;
|
11
11
|
import java.util.ArrayList;
|
12
12
|
import java.util.Locale;
|
13
|
+
import java.lang.Long;
|
13
14
|
|
14
15
|
import org.embulk.config.Config;
|
15
16
|
import org.embulk.config.ConfigDefault;
|
@@ -89,6 +90,10 @@ public class Larges3FileOutputPlugin
|
|
89
90
|
@Config("canned_acl")
|
90
91
|
@ConfigDefault("null")
|
91
92
|
Optional<CannedAccessControlList> getCannedAccessControlList();
|
93
|
+
|
94
|
+
@Config("part_size")
|
95
|
+
@ConfigDefault("52428800")
|
96
|
+
Optional<String> getPartSize();
|
92
97
|
}
|
93
98
|
|
94
99
|
public static class S3FileOutput
|
@@ -102,6 +107,7 @@ public class Larges3FileOutputPlugin
|
|
102
107
|
private final String sequenceFormat;
|
103
108
|
private final String fileNameExtension;
|
104
109
|
private final String tempPathPrefix;
|
110
|
+
private final String partSize;
|
105
111
|
private final Optional<CannedAccessControlList> cannedAccessControlListOptional;
|
106
112
|
|
107
113
|
private int taskIndex;
|
@@ -153,6 +159,7 @@ public class Larges3FileOutputPlugin
|
|
153
159
|
this.sequenceFormat = task.getSequenceFormat();
|
154
160
|
this.fileNameExtension = task.getFileNameExtension();
|
155
161
|
this.tempPathPrefix = task.getTempPathPrefix();
|
162
|
+
this.partSize = task.getPartSize().get();
|
156
163
|
if (task.getTempPath().isPresent()) {
|
157
164
|
this.tempPath = task.getTempPath().get();
|
158
165
|
}
|
@@ -201,13 +208,13 @@ public class Larges3FileOutputPlugin
|
|
201
208
|
|
202
209
|
File file = new File(from.toString());
|
203
210
|
long contentLength = file.length();
|
204
|
-
long partSize =
|
211
|
+
long partSize = Long.parseLong(this.partSize); // Set part size to 50 MB.
|
205
212
|
|
206
213
|
try {
|
207
214
|
// Step 2: Upload parts.
|
208
215
|
long filePosition = 0;
|
209
216
|
for (int i = 1; filePosition < contentLength; i++) {
|
210
|
-
// Last part can be less than
|
217
|
+
// Last part can be less than 50 MB. Adjust part size.
|
211
218
|
partSize = Math.min(partSize, (contentLength - filePosition));
|
212
219
|
|
213
220
|
// Create request to upload a part.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-larges3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Angelos Alexopoulos
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,9 +67,9 @@ files:
|
|
67
67
|
- classpath/httpclient-4.5.2.jar
|
68
68
|
- classpath/commons-codec-1.9.jar
|
69
69
|
- classpath/aws-java-sdk-s3-1.11.271.jar
|
70
|
+
- classpath/embulk-output-larges3-0.1.3.jar
|
70
71
|
- classpath/jmespath-java-1.11.271.jar
|
71
72
|
- classpath/jackson-dataformat-cbor-2.6.7.jar
|
72
|
-
- classpath/embulk-output-larges3-0.1.2.jar
|
73
73
|
- classpath/jackson-databind-2.6.7.1.jar
|
74
74
|
- classpath/httpcore-4.4.4.jar
|
75
75
|
- classpath/aws-java-sdk-kms-1.11.271.jar
|
Binary file
|