embulk-output-s3v2 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +71 -0
  3. data/README.md +37 -2
  4. data/build.gradle +3 -1
  5. data/classpath/annotations-2.17.283.jar +0 -0
  6. data/classpath/apache-client-2.17.283.jar +0 -0
  7. data/classpath/arns-2.17.283.jar +0 -0
  8. data/classpath/auth-2.17.283.jar +0 -0
  9. data/classpath/aws-core-2.17.283.jar +0 -0
  10. data/classpath/aws-json-protocol-2.17.282.jar +0 -0
  11. data/classpath/aws-query-protocol-2.17.283.jar +0 -0
  12. data/classpath/aws-xml-protocol-2.17.283.jar +0 -0
  13. data/classpath/{embulk-output-s3v2-0.2.0.jar → embulk-output-s3v2-0.3.0.jar} +0 -0
  14. data/classpath/http-client-spi-2.17.283.jar +0 -0
  15. data/classpath/json-utils-2.17.283.jar +0 -0
  16. data/classpath/metrics-spi-2.17.283.jar +0 -0
  17. data/classpath/netty-buffer-4.1.77.Final.jar +0 -0
  18. data/classpath/netty-codec-4.1.77.Final.jar +0 -0
  19. data/classpath/netty-codec-http-4.1.77.Final.jar +0 -0
  20. data/classpath/netty-codec-http2-4.1.77.Final.jar +0 -0
  21. data/classpath/netty-common-4.1.77.Final.jar +0 -0
  22. data/classpath/netty-handler-4.1.77.Final.jar +0 -0
  23. data/classpath/netty-nio-client-2.17.283.jar +0 -0
  24. data/classpath/{netty-resolver-4.1.53.Final.jar → netty-resolver-4.1.77.Final.jar} +0 -0
  25. data/classpath/netty-transport-4.1.77.Final.jar +0 -0
  26. data/classpath/netty-transport-classes-epoll-4.1.77.Final.jar +0 -0
  27. data/classpath/netty-transport-native-unix-common-4.1.77.Final.jar +0 -0
  28. data/classpath/profiles-2.17.283.jar +0 -0
  29. data/classpath/protocol-core-2.17.283.jar +0 -0
  30. data/classpath/regions-2.17.283.jar +0 -0
  31. data/classpath/s3-2.17.283.jar +0 -0
  32. data/classpath/sdk-core-2.17.283.jar +0 -0
  33. data/classpath/slf4j-api-1.7.30.jar +0 -0
  34. data/classpath/sso-2.17.282.jar +0 -0
  35. data/classpath/third-party-jackson-core-2.17.283.jar +0 -0
  36. data/classpath/utils-2.17.283.jar +0 -0
  37. data/src/main/java/org/embulk/output/s3v2/s3/S3ClientManager.java +14 -21
  38. metadata +35 -35
  39. data/classpath/annotations-2.15.61.jar +0 -0
  40. data/classpath/apache-client-2.15.61.jar +0 -0
  41. data/classpath/arns-2.15.61.jar +0 -0
  42. data/classpath/auth-2.15.61.jar +0 -0
  43. data/classpath/aws-core-2.15.61.jar +0 -0
  44. data/classpath/aws-query-protocol-2.15.61.jar +0 -0
  45. data/classpath/aws-xml-protocol-2.15.61.jar +0 -0
  46. data/classpath/http-client-spi-2.15.61.jar +0 -0
  47. data/classpath/jackson-annotations-2.10.5.jar +0 -0
  48. data/classpath/jackson-core-2.10.5.jar +0 -0
  49. data/classpath/jackson-databind-2.10.5.1.jar +0 -0
  50. data/classpath/metrics-spi-2.15.61.jar +0 -0
  51. data/classpath/netty-buffer-4.1.53.Final.jar +0 -0
  52. data/classpath/netty-codec-4.1.53.Final.jar +0 -0
  53. data/classpath/netty-codec-http-4.1.53.Final.jar +0 -0
  54. data/classpath/netty-codec-http2-4.1.53.Final.jar +0 -0
  55. data/classpath/netty-common-4.1.53.Final.jar +0 -0
  56. data/classpath/netty-handler-4.1.53.Final.jar +0 -0
  57. data/classpath/netty-nio-client-2.15.61.jar +0 -0
  58. data/classpath/netty-reactive-streams-2.0.4.jar +0 -0
  59. data/classpath/netty-reactive-streams-http-2.0.4.jar +0 -0
  60. data/classpath/netty-transport-4.1.53.Final.jar +0 -0
  61. data/classpath/netty-transport-native-epoll-4.1.53.Final-linux-x86_64.jar +0 -0
  62. data/classpath/netty-transport-native-unix-common-4.1.53.Final.jar +0 -0
  63. data/classpath/profiles-2.15.61.jar +0 -0
  64. data/classpath/protocol-core-2.15.61.jar +0 -0
  65. data/classpath/regions-2.15.61.jar +0 -0
  66. data/classpath/s3-2.15.61.jar +0 -0
  67. data/classpath/sdk-core-2.15.61.jar +0 -0
  68. data/classpath/slf4j-api-1.7.28.jar +0 -0
  69. data/classpath/utils-2.15.61.jar +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38c87b938e84dcad47a5b47f65872b15d6a549877003bc1379bf9ca32c44eddb
4
- data.tar.gz: 3505a9c76d0c385e25d8b1b11676143cca4b3c798d130a0e0c0724abd63dc1a1
3
+ metadata.gz: 5b2d6890502b2b8e6bf2769d5b9952ed18bfdc5f32b49ad863208fbb4acfb65c
4
+ data.tar.gz: 8775e499e5f2f94be1d1d20af588318ce333cd5e7223dc1b3bc8ca01e60decc8
5
5
  SHA512:
6
- metadata.gz: bc999df40b640047ba878092a623b22ad25b8c975600a4615165446703b3827e823d24ed7df17b7bcc1aa64ef181847414ef72381a529b41642573a868e0c045
7
- data.tar.gz: 3d3b4f6360528d541fff6d84963dd96b4d44360c0d9ab0f47f657ec0fee550858ff03faa64f4c0d0f2b30ee8910516a497a43b6a2ba38ae4bf67cd8272caa848
6
+ metadata.gz: 44bdad7736ff31c9e7037d2c0d567f3074eedd53c83865372730963f46a43f126cf7ef9a68a701245c4ead500d8f7071c9ddac7399f98b396ae0af4a803d5665
7
+ data.tar.gz: 1bdf96573eb326f63daa531f9dc2b6b54f455b0f3bf10454bb375b3ffebd1547e13322ab9cb99c590a733c02dd68de24e8fe9268de382be559a10115f801c05a
@@ -0,0 +1,71 @@
1
+ # For most projects, this workflow file will not need changing; you simply need
2
+ # to commit it to your repository.
3
+ #
4
+ # You may wish to alter this file to override the set of languages analyzed,
5
+ # or to provide custom queries or build logic.
6
+ #
7
+ # ******** NOTE ********
8
+ # We have attempted to detect the languages in your repository. Please check
9
+ # the `language` matrix defined below to confirm you have the correct set of
10
+ # supported CodeQL languages.
11
+ #
12
+ name: "CodeQL"
13
+
14
+ on:
15
+ push:
16
+ branches: [ master ]
17
+ pull_request:
18
+ # The branches below must be a subset of the branches above
19
+ branches: [ master ]
20
+ schedule:
21
+ - cron: '36 5 * * 2'
22
+
23
+ jobs:
24
+ analyze:
25
+ name: Analyze
26
+ runs-on: ubuntu-latest
27
+ permissions:
28
+ actions: read
29
+ contents: read
30
+ security-events: write
31
+
32
+ strategy:
33
+ fail-fast: false
34
+ matrix:
35
+ language: [ 'java' ]
36
+ # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
37
+ # Learn more:
38
+ # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
39
+
40
+ steps:
41
+ - name: Checkout repository
42
+ uses: actions/checkout@v2
43
+
44
+ # Initializes the CodeQL tools for scanning.
45
+ - name: Initialize CodeQL
46
+ uses: github/codeql-action/init@v1
47
+ with:
48
+ languages: ${{ matrix.language }}
49
+ # If you wish to specify custom queries, you can do so here or in a config file.
50
+ # By default, queries listed here will override any specified in a config file.
51
+ # Prefix the list here with "+" to use these queries and those in the config file.
52
+ # queries: ./path/to/local/query, your-org/your-repo/queries@main
53
+
54
+ # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
55
+ # If this step fails, then you should remove it and run the build manually (see below)
56
+ - name: Autobuild
57
+ uses: github/codeql-action/autobuild@v1
58
+
59
+ # ℹ️ Command-line programs to run using the OS shell.
60
+ # 📚 https://git.io/JvXDl
61
+
62
+ # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
63
+ # and modify them (or add more) to build your code if your project
64
+ # uses a compiled language
65
+
66
+ #- run: |
67
+ # make bootstrap
68
+ # make release
69
+
70
+ - name: Perform CodeQL Analysis
71
+ uses: github/codeql-action/analyze@v1
data/README.md CHANGED
@@ -13,7 +13,7 @@ Files stores on Amazon S3.
13
13
 
14
14
  ## Configuration
15
15
  - **region**: AWS region name. (string, required)
16
- - **enable_profile**: If true, AWS credentials profile will be used when authenticating AWS. (boolean, default: `false`)
16
+ - **enable_profile**: If true, AWS credentials profile will be used when authenticating AWS. If false, IamRole will be used. (boolean, default: `false`)
17
17
  - Supported in v0.2.0 or later
18
18
  - **profile**: AWS credentials profile name. If `enable_profile` is false, this parameter will be ignored. (string, default: `default`)
19
19
  - Supported in v0.2.0 or later
@@ -27,7 +27,7 @@ Files stores on Amazon S3.
27
27
  - Maximum size: `2GB`
28
28
  - Enable semantics
29
29
  - Same as that of `multipart_threshold`
30
- - **multipart_threshold**: If `enable_multi_part_upload` is false, this parameter will be ignored. (string, default: `8MB`)
30
+ - **multipart_threshold**: The size threshold the plugin uses for multipart transfers of individual divided bulk-data. If `enable_multi_part_upload` is false, this parameter will be ignored. (string, default: `8MB`)
31
31
  - Enable semantics
32
32
  - `KB`
33
33
  - `MB`
@@ -38,6 +38,7 @@ Files stores on Amazon S3.
38
38
  - **temp_path**: Directory for temp file output. (string, default: `/tmp`)
39
39
  - **temp_file_prefix**: Prefix of temp file name. (string, default: `embulk-output-s3v2`)
40
40
  ### Example
41
+ #### Basic sample with IAMRole authentication
41
42
  ```yaml
42
43
  out:
43
44
  type: s3v2
@@ -50,6 +51,40 @@ out:
50
51
  type: csv
51
52
  delimeter: ","
52
53
  ```
54
+ #### Basic sample with Credentials-Profile authentication
55
+ ```yaml
56
+ out:
57
+ type: s3v2
58
+ region: ap-northeast-1
59
+ bucket: s3-bucket-name
60
+ object_key_prefix: embulk/embulk-output-s3v2
61
+ temp_path: /tmp
62
+ enable_profile: true
63
+ profile: default
64
+ extension: .csv
65
+ formatter:
66
+ type: csv
67
+ delimeter: ","
68
+ ```
69
+ #### Multipart Upload Sample with gzip encode
70
+ ```yaml
71
+ out:
72
+ type: s3v2
73
+ region: ap-northeast-1
74
+ bucket: s3-bucket-name
75
+ object_key_prefix: embulk/embulk-output-s3v2
76
+ temp_path: /tmp
77
+ enable_multi_part_upload: true
78
+ multipart_chunksize: 10MB
79
+ max_concurrent_requests: 20
80
+ extension: csv.gz
81
+ formatter:
82
+ type: csv
83
+ delimeter: ","
84
+ encoders:
85
+ - type: gzip
86
+ level: 1
87
+ ```
53
88
 
54
89
  ## Usage
55
90
  ### Build
data/build.gradle CHANGED
@@ -6,7 +6,7 @@ plugins {
6
6
  import com.github.jrubygradle.JRubyExec
7
7
 
8
8
  group 'com.github.ttksm'
9
- version '0.2.0'
9
+ version '0.3.0'
10
10
 
11
11
  sourceCompatibility = 1.8
12
12
  targetCompatibility = 1.8
@@ -28,6 +28,8 @@ dependencies {
28
28
  testCompile 'org.mockito:mockito-core:3.+'
29
29
  testCompile 'org.mockito:mockito-junit-jupiter:3.+'
30
30
  testCompile 'org.embulk:embulk-core:0.9.23:tests'
31
+ compile 'software.amazon.awssdk:bom:2.17.282'
32
+ compile 'software.amazon.awssdk:sso:2.17.282'
31
33
  }
32
34
 
33
35
  test {
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,13 +1,11 @@
1
1
  package org.embulk.output.s3v2.s3;
2
2
 
3
- import java.io.BufferedOutputStream;
4
- import java.io.FileOutputStream;
3
+ import java.io.BufferedInputStream;
4
+ import java.io.ByteArrayOutputStream;
5
+ import java.io.FileInputStream;
5
6
  import java.io.IOException;
6
7
  import java.nio.ByteBuffer;
7
- import java.nio.channels.FileChannel;
8
- import java.nio.file.Files;
9
8
  import java.nio.file.Path;
10
- import java.nio.file.Paths;
11
9
  import java.util.ArrayList;
12
10
  import java.util.List;
13
11
  import java.util.concurrent.CompletableFuture;
@@ -91,26 +89,23 @@ public class S3ClientManager
91
89
  CreateMultipartUploadResponse response = s3.createMultipartUpload(createMultipartUploadRequest);
92
90
  String uploadId = response.uploadId();
93
91
 
94
- try (FileChannel fc = FileChannel.open(sourceFile)) {
95
- String multipartChunksize = status.getMultipartChunksize();
96
- ByteBuffer buffer = ByteBuffer.allocate(ChunksizeComputation.getChunksizeBytes(multipartChunksize));
97
-
92
+ try (BufferedInputStream bufferStream = new BufferedInputStream(
93
+ new FileInputStream(sourceFile.toFile()))) {
98
94
  ExecutorService es = Executors.newFixedThreadPool(status.getMaxConcurrentRequests());
99
95
  List<CompletableFuture<String>> futureList = new ArrayList<>();
96
+
97
+ int multipartChunksize = ChunksizeComputation.getChunksizeBytes(status.getMultipartChunksize());
98
+ byte[] data = new byte[multipartChunksize];
100
99
  int i = 1;
101
100
  while (true) {
102
- buffer.clear();
103
- if (fc.read(buffer) == -1) {
101
+ int n = bufferStream.read(data);
102
+ if (n == -1) {
104
103
  break;
105
104
  }
106
- buffer.flip();
107
105
 
108
- // Create tmpFile per size of multipart_chunksize
109
- String tmpFile = sourceFile + "_" + i;
110
- try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(tmpFile))) {
111
- bos.write(buffer.array(), buffer.arrayOffset(), buffer.limit());
112
- bos.flush();
113
- }
106
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
107
+ out.write(data, 0, n);
108
+ byte[] chunk = out.toByteArray();
114
109
 
115
110
  UploadPartRequest uploadPartRequest = UploadPartRequest.builder()
116
111
  .bucket(bucket).key(objectKey).uploadId(uploadId)
@@ -119,7 +114,7 @@ public class S3ClientManager
119
114
 
120
115
  // Async upload to S3
121
116
  CompletableFuture<String> future = CompletableFuture.supplyAsync(() -> {
122
- return s3.uploadPart(uploadPartRequest, RequestBody.fromFile(Paths.get(tmpFile))).eTag();
117
+ return s3.uploadPart(uploadPartRequest, RequestBody.fromBytes(chunk)).eTag();
123
118
  }, es);
124
119
  futureList.add(future);
125
120
 
@@ -131,8 +126,6 @@ public class S3ClientManager
131
126
  for (int j = 1; j <= futureList.size(); j++) {
132
127
  CompletedPart part = CompletedPart.builder().partNumber(j).eTag(futureList.get(j - 1).get()).build();
133
128
  partList.add(part);
134
- // Remove tmpFile
135
- Files.delete(Paths.get(sourceFile + "_" + j));
136
129
  }
137
130
 
138
131
  CompletedMultipartUpload completedMultipartUpload = CompletedMultipartUpload.builder()
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-output-s3v2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Toshihiro Takushima
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-11 00:00:00.000000000 Z
11
+ date: 2022-09-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Stores files on Amazon S3 using aws-sdk-java-v2.
14
14
  email:
@@ -18,50 +18,50 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - ".github/workflows/build.yml"
21
+ - ".github/workflows/codeql-analysis.yml"
21
22
  - ".github/workflows/release.yml"
22
23
  - ".gitignore"
23
24
  - LICENSE
24
25
  - README.md
25
26
  - build.gradle
26
- - classpath/annotations-2.15.61.jar
27
- - classpath/apache-client-2.15.61.jar
28
- - classpath/arns-2.15.61.jar
29
- - classpath/auth-2.15.61.jar
30
- - classpath/aws-core-2.15.61.jar
31
- - classpath/aws-query-protocol-2.15.61.jar
32
- - classpath/aws-xml-protocol-2.15.61.jar
27
+ - classpath/annotations-2.17.283.jar
28
+ - classpath/apache-client-2.17.283.jar
29
+ - classpath/arns-2.17.283.jar
30
+ - classpath/auth-2.17.283.jar
31
+ - classpath/aws-core-2.17.283.jar
32
+ - classpath/aws-json-protocol-2.17.282.jar
33
+ - classpath/aws-query-protocol-2.17.283.jar
34
+ - classpath/aws-xml-protocol-2.17.283.jar
33
35
  - classpath/commons-codec-1.11.jar
34
36
  - classpath/commons-logging-1.2.jar
35
- - classpath/embulk-output-s3v2-0.2.0.jar
37
+ - classpath/embulk-output-s3v2-0.3.0.jar
36
38
  - classpath/eventstream-1.0.1.jar
37
- - classpath/http-client-spi-2.15.61.jar
39
+ - classpath/http-client-spi-2.17.283.jar
38
40
  - classpath/httpclient-4.5.13.jar
39
41
  - classpath/httpcore-4.4.13.jar
40
- - classpath/jackson-annotations-2.10.5.jar
41
- - classpath/jackson-core-2.10.5.jar
42
- - classpath/jackson-databind-2.10.5.1.jar
43
- - classpath/metrics-spi-2.15.61.jar
44
- - classpath/netty-buffer-4.1.53.Final.jar
45
- - classpath/netty-codec-4.1.53.Final.jar
46
- - classpath/netty-codec-http-4.1.53.Final.jar
47
- - classpath/netty-codec-http2-4.1.53.Final.jar
48
- - classpath/netty-common-4.1.53.Final.jar
49
- - classpath/netty-handler-4.1.53.Final.jar
50
- - classpath/netty-nio-client-2.15.61.jar
51
- - classpath/netty-reactive-streams-2.0.4.jar
52
- - classpath/netty-reactive-streams-http-2.0.4.jar
53
- - classpath/netty-resolver-4.1.53.Final.jar
54
- - classpath/netty-transport-4.1.53.Final.jar
55
- - classpath/netty-transport-native-epoll-4.1.53.Final-linux-x86_64.jar
56
- - classpath/netty-transport-native-unix-common-4.1.53.Final.jar
57
- - classpath/profiles-2.15.61.jar
58
- - classpath/protocol-core-2.15.61.jar
42
+ - classpath/json-utils-2.17.283.jar
43
+ - classpath/metrics-spi-2.17.283.jar
44
+ - classpath/netty-buffer-4.1.77.Final.jar
45
+ - classpath/netty-codec-4.1.77.Final.jar
46
+ - classpath/netty-codec-http-4.1.77.Final.jar
47
+ - classpath/netty-codec-http2-4.1.77.Final.jar
48
+ - classpath/netty-common-4.1.77.Final.jar
49
+ - classpath/netty-handler-4.1.77.Final.jar
50
+ - classpath/netty-nio-client-2.17.283.jar
51
+ - classpath/netty-resolver-4.1.77.Final.jar
52
+ - classpath/netty-transport-4.1.77.Final.jar
53
+ - classpath/netty-transport-classes-epoll-4.1.77.Final.jar
54
+ - classpath/netty-transport-native-unix-common-4.1.77.Final.jar
55
+ - classpath/profiles-2.17.283.jar
56
+ - classpath/protocol-core-2.17.283.jar
59
57
  - classpath/reactive-streams-1.0.3.jar
60
- - classpath/regions-2.15.61.jar
61
- - classpath/s3-2.15.61.jar
62
- - classpath/sdk-core-2.15.61.jar
63
- - classpath/slf4j-api-1.7.28.jar
64
- - classpath/utils-2.15.61.jar
58
+ - classpath/regions-2.17.283.jar
59
+ - classpath/s3-2.17.283.jar
60
+ - classpath/sdk-core-2.17.283.jar
61
+ - classpath/slf4j-api-1.7.30.jar
62
+ - classpath/sso-2.17.282.jar
63
+ - classpath/third-party-jackson-core-2.17.283.jar
64
+ - classpath/utils-2.17.283.jar
65
65
  - config/checkstyle/checkstyle.xml
66
66
  - config/checkstyle/default.xml
67
67
  - gradle/wrapper/gradle-wrapper.jar
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file