embulk-output-s3v2 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -2
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/output/s3v2/s3/S3ClientManager.java +14 -21
- metadata +19 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86426b765f2029544c4377e34d1b53127700c2d61e24b70d50c7ceebb2cd2501
|
4
|
+
data.tar.gz: 1e3baf5a52ea06045bb5964d67736a8820aa8095fecacd74b7fe0d78566f207d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d2fed62db69b832ed3affe66b66b76f87d04f793ec008d4a56e88c1e3530bfe40b63f7e7d0e3aaa1f0b59ffd80d057facfe92de2bec6608b483037be2230154
|
7
|
+
data.tar.gz: e8af855d04298199185d9718374f69ca67c518bfdcc3c06ff295d5d14589a7f334007dbf01fccc5a2b29b1b4acc69f0e47784a4c9bd749acc724e2ec4e89bb44
|
data/README.md
CHANGED
@@ -13,7 +13,7 @@ Files stores on Amazon S3.
|
|
13
13
|
|
14
14
|
## Configuration
|
15
15
|
- **region**: AWS region name. (string, required)
|
16
|
-
- **enable_profile**: If true, AWS credentials profile will be used when authenticating AWS. (boolean, default: `false`)
|
16
|
+
- **enable_profile**: If true, AWS credentials profile will be used when authenticating AWS. If false, IamRole will be used. (boolean, default: `false`)
|
17
17
|
- Supported in v0.2.0 or later
|
18
18
|
- **profile**: AWS credentials profile name. If `enable_profile` is false, this parameter will be ignored. (string, default: `default`)
|
19
19
|
- Supported in v0.2.0 or later
|
@@ -27,7 +27,7 @@ Files stores on Amazon S3.
|
|
27
27
|
- Maximum size: `2GB`
|
28
28
|
- Enable semantics
|
29
29
|
- Same as that of `multipart_threshold`
|
30
|
-
- **multipart_threshold**: If `enable_multi_part_upload` is false, this parameter will be ignored. (string, default: `8MB`)
|
30
|
+
- **multipart_threshold**: The size threshold the plugin uses for multipart transfers of individual divided bulk-data. If `enable_multi_part_upload` is false, this parameter will be ignored. (string, default: `8MB`)
|
31
31
|
- Enable semantics
|
32
32
|
- `KB`
|
33
33
|
- `MB`
|
@@ -38,6 +38,7 @@ Files stores on Amazon S3.
|
|
38
38
|
- **temp_path**: Directory for temp file output. (string, default: `/tmp`)
|
39
39
|
- **temp_file_prefix**: Prefix of temp file name. (string, default: `embulk-output-s3v2`)
|
40
40
|
### Example
|
41
|
+
#### Basic sample with IAMRole authentication
|
41
42
|
```yaml
|
42
43
|
out:
|
43
44
|
type: s3v2
|
@@ -50,6 +51,40 @@ out:
|
|
50
51
|
type: csv
|
51
52
|
delimeter: ","
|
52
53
|
```
|
54
|
+
#### Basic sample with Credentials-Profile authentication
|
55
|
+
```yaml
|
56
|
+
out:
|
57
|
+
type: s3v2
|
58
|
+
region: ap-northeast-1
|
59
|
+
bucket: s3-bucket-name
|
60
|
+
object_key_prefix: embulk/embulk-output-s3v2
|
61
|
+
temp_path: /tmp
|
62
|
+
enable_profile: true
|
63
|
+
profile: default
|
64
|
+
extension: .csv
|
65
|
+
formatter:
|
66
|
+
type: csv
|
67
|
+
delimeter: ","
|
68
|
+
```
|
69
|
+
#### Multipart Upload Sample with gzip encode
|
70
|
+
```yaml
|
71
|
+
out:
|
72
|
+
type: s3v2
|
73
|
+
region: ap-northeast-1
|
74
|
+
bucket: s3-bucket-name
|
75
|
+
object_key_prefix: embulk/embulk-output-s3v2
|
76
|
+
temp_path: /tmp
|
77
|
+
enable_multi_part_upload: true
|
78
|
+
multipart_chunksize: 10MB
|
79
|
+
max_concurrent_requests: 20
|
80
|
+
extension: csv.gz
|
81
|
+
formatter:
|
82
|
+
type: csv
|
83
|
+
delimeter: ","
|
84
|
+
encoders:
|
85
|
+
- type: gzip
|
86
|
+
level: 1
|
87
|
+
```
|
53
88
|
|
54
89
|
## Usage
|
55
90
|
### Build
|
data/build.gradle
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
package org.embulk.output.s3v2.s3;
|
2
2
|
|
3
|
-
import java.io.
|
4
|
-
import java.io.
|
3
|
+
import java.io.BufferedInputStream;
|
4
|
+
import java.io.ByteArrayOutputStream;
|
5
|
+
import java.io.FileInputStream;
|
5
6
|
import java.io.IOException;
|
6
7
|
import java.nio.ByteBuffer;
|
7
|
-
import java.nio.channels.FileChannel;
|
8
|
-
import java.nio.file.Files;
|
9
8
|
import java.nio.file.Path;
|
10
|
-
import java.nio.file.Paths;
|
11
9
|
import java.util.ArrayList;
|
12
10
|
import java.util.List;
|
13
11
|
import java.util.concurrent.CompletableFuture;
|
@@ -91,26 +89,23 @@ public class S3ClientManager
|
|
91
89
|
CreateMultipartUploadResponse response = s3.createMultipartUpload(createMultipartUploadRequest);
|
92
90
|
String uploadId = response.uploadId();
|
93
91
|
|
94
|
-
try (
|
95
|
-
|
96
|
-
ByteBuffer buffer = ByteBuffer.allocate(ChunksizeComputation.getChunksizeBytes(multipartChunksize));
|
97
|
-
|
92
|
+
try (BufferedInputStream bufferStream = new BufferedInputStream(
|
93
|
+
new FileInputStream(sourceFile.toFile()))) {
|
98
94
|
ExecutorService es = Executors.newFixedThreadPool(status.getMaxConcurrentRequests());
|
99
95
|
List<CompletableFuture<String>> futureList = new ArrayList<>();
|
96
|
+
|
97
|
+
int multipartChunksize = ChunksizeComputation.getChunksizeBytes(status.getMultipartChunksize());
|
98
|
+
byte[] data = new byte[multipartChunksize];
|
100
99
|
int i = 1;
|
101
100
|
while (true) {
|
102
|
-
|
103
|
-
if (
|
101
|
+
int n = bufferStream.read(data);
|
102
|
+
if (n == -1) {
|
104
103
|
break;
|
105
104
|
}
|
106
|
-
buffer.flip();
|
107
105
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
bos.write(buffer.array(), buffer.arrayOffset(), buffer.limit());
|
112
|
-
bos.flush();
|
113
|
-
}
|
106
|
+
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
107
|
+
out.write(data, 0, n);
|
108
|
+
byte[] chunk = out.toByteArray();
|
114
109
|
|
115
110
|
UploadPartRequest uploadPartRequest = UploadPartRequest.builder()
|
116
111
|
.bucket(bucket).key(objectKey).uploadId(uploadId)
|
@@ -119,7 +114,7 @@ public class S3ClientManager
|
|
119
114
|
|
120
115
|
// Async upload to S3
|
121
116
|
CompletableFuture<String> future = CompletableFuture.supplyAsync(() -> {
|
122
|
-
return s3.uploadPart(uploadPartRequest, RequestBody.
|
117
|
+
return s3.uploadPart(uploadPartRequest, RequestBody.fromBytes(chunk)).eTag();
|
123
118
|
}, es);
|
124
119
|
futureList.add(future);
|
125
120
|
|
@@ -131,8 +126,6 @@ public class S3ClientManager
|
|
131
126
|
for (int j = 1; j <= futureList.size(); j++) {
|
132
127
|
CompletedPart part = CompletedPart.builder().partNumber(j).eTag(futureList.get(j - 1).get()).build();
|
133
128
|
partList.add(part);
|
134
|
-
// Remove tmpFile
|
135
|
-
Files.delete(Paths.get(sourceFile + "_" + j));
|
136
129
|
}
|
137
130
|
|
138
131
|
CompletedMultipartUpload completedMultipartUpload = CompletedMultipartUpload.builder()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-output-s3v2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Toshihiro Takushima
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Stores files on Amazon S3 using aws-sdk-java-v2.
|
14
14
|
email:
|
@@ -23,45 +23,45 @@ files:
|
|
23
23
|
- LICENSE
|
24
24
|
- README.md
|
25
25
|
- build.gradle
|
26
|
-
- classpath/annotations-2.15.
|
27
|
-
- classpath/apache-client-2.15.
|
28
|
-
- classpath/arns-2.15.
|
29
|
-
- classpath/auth-2.15.
|
30
|
-
- classpath/aws-core-2.15.
|
31
|
-
- classpath/aws-query-protocol-2.15.
|
32
|
-
- classpath/aws-xml-protocol-2.15.
|
26
|
+
- classpath/annotations-2.15.62.jar
|
27
|
+
- classpath/apache-client-2.15.62.jar
|
28
|
+
- classpath/arns-2.15.62.jar
|
29
|
+
- classpath/auth-2.15.62.jar
|
30
|
+
- classpath/aws-core-2.15.62.jar
|
31
|
+
- classpath/aws-query-protocol-2.15.62.jar
|
32
|
+
- classpath/aws-xml-protocol-2.15.62.jar
|
33
33
|
- classpath/commons-codec-1.11.jar
|
34
34
|
- classpath/commons-logging-1.2.jar
|
35
|
-
- classpath/embulk-output-s3v2-0.2.
|
35
|
+
- classpath/embulk-output-s3v2-0.2.1.jar
|
36
36
|
- classpath/eventstream-1.0.1.jar
|
37
|
-
- classpath/http-client-spi-2.15.
|
37
|
+
- classpath/http-client-spi-2.15.62.jar
|
38
38
|
- classpath/httpclient-4.5.13.jar
|
39
39
|
- classpath/httpcore-4.4.13.jar
|
40
40
|
- classpath/jackson-annotations-2.10.5.jar
|
41
41
|
- classpath/jackson-core-2.10.5.jar
|
42
42
|
- classpath/jackson-databind-2.10.5.1.jar
|
43
|
-
- classpath/metrics-spi-2.15.
|
43
|
+
- classpath/metrics-spi-2.15.62.jar
|
44
44
|
- classpath/netty-buffer-4.1.53.Final.jar
|
45
45
|
- classpath/netty-codec-4.1.53.Final.jar
|
46
46
|
- classpath/netty-codec-http-4.1.53.Final.jar
|
47
47
|
- classpath/netty-codec-http2-4.1.53.Final.jar
|
48
48
|
- classpath/netty-common-4.1.53.Final.jar
|
49
49
|
- classpath/netty-handler-4.1.53.Final.jar
|
50
|
-
- classpath/netty-nio-client-2.15.
|
50
|
+
- classpath/netty-nio-client-2.15.62.jar
|
51
51
|
- classpath/netty-reactive-streams-2.0.4.jar
|
52
52
|
- classpath/netty-reactive-streams-http-2.0.4.jar
|
53
53
|
- classpath/netty-resolver-4.1.53.Final.jar
|
54
54
|
- classpath/netty-transport-4.1.53.Final.jar
|
55
55
|
- classpath/netty-transport-native-epoll-4.1.53.Final-linux-x86_64.jar
|
56
56
|
- classpath/netty-transport-native-unix-common-4.1.53.Final.jar
|
57
|
-
- classpath/profiles-2.15.
|
58
|
-
- classpath/protocol-core-2.15.
|
57
|
+
- classpath/profiles-2.15.62.jar
|
58
|
+
- classpath/protocol-core-2.15.62.jar
|
59
59
|
- classpath/reactive-streams-1.0.3.jar
|
60
|
-
- classpath/regions-2.15.
|
61
|
-
- classpath/s3-2.15.
|
62
|
-
- classpath/sdk-core-2.15.
|
60
|
+
- classpath/regions-2.15.62.jar
|
61
|
+
- classpath/s3-2.15.62.jar
|
62
|
+
- classpath/sdk-core-2.15.62.jar
|
63
63
|
- classpath/slf4j-api-1.7.28.jar
|
64
|
-
- classpath/utils-2.15.
|
64
|
+
- classpath/utils-2.15.62.jar
|
65
65
|
- config/checkstyle/checkstyle.xml
|
66
66
|
- config/checkstyle/default.xml
|
67
67
|
- gradle/wrapper/gradle-wrapper.jar
|