embulk-output-s3v2 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +26 -0
  3. data/.github/workflows/release.yml +37 -0
  4. data/.gitignore +34 -0
  5. data/LICENSE +21 -0
  6. data/README.md +54 -0
  7. data/build.gradle +103 -0
  8. data/config/checkstyle/checkstyle.xml +130 -0
  9. data/config/checkstyle/default.xml +110 -0
  10. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  11. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  12. data/gradlew +172 -0
  13. data/gradlew.bat +84 -0
  14. data/lib/embulk/output/s3v2.rb +5 -0
  15. data/settings.gradle +2 -0
  16. data/src/main/java/org/embulk/output/s3v2/PluginTask.java +53 -0
  17. data/src/main/java/org/embulk/output/s3v2/S3V2FileOutputPlugin.java +49 -0
  18. data/src/main/java/org/embulk/output/s3v2/s3/S3ClientManager.java +161 -0
  19. data/src/main/java/org/embulk/output/s3v2/s3/S3MultiPartStatus.java +30 -0
  20. data/src/main/java/org/embulk/output/s3v2/strategy/AbstractStrategy.java +41 -0
  21. data/src/main/java/org/embulk/output/s3v2/strategy/BufferedStrategy.java +76 -0
  22. data/src/main/java/org/embulk/output/s3v2/strategy/FileOutputStrategy.java +166 -0
  23. data/src/main/java/org/embulk/output/s3v2/util/AbstractUnitComputation.java +39 -0
  24. data/src/main/java/org/embulk/output/s3v2/util/ChunksizeComputation.java +26 -0
  25. data/src/main/java/org/embulk/output/s3v2/util/ThresholdComputation.java +23 -0
  26. data/src/test/java/org/embulk/output/s3v2/strategy/BufferedStrategyTests.java +45 -0
  27. data/src/test/java/org/embulk/output/s3v2/util/ChunksizeComputationTests.java +53 -0
  28. data/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker +1 -0
  29. metadata +110 -0
@@ -0,0 +1,30 @@
1
+ package org.embulk.output.s3v2.s3;
2
+
3
+ public class S3MultiPartStatus
4
+ {
5
+ private final int maxConcurrentRequests;
6
+ private final String multipartChunksize;
7
+ private final String multipartThreshold;
8
+
9
+ public S3MultiPartStatus(int maxConcurrentRequests, String multipartChunksize, String multipartThreshold)
10
+ {
11
+ this.maxConcurrentRequests = maxConcurrentRequests;
12
+ this.multipartChunksize = multipartChunksize;
13
+ this.multipartThreshold = multipartThreshold;
14
+ }
15
+
16
+ public int getMaxConcurrentRequests()
17
+ {
18
+ return maxConcurrentRequests;
19
+ }
20
+
21
+ public String getMultipartChunksize()
22
+ {
23
+ return multipartChunksize;
24
+ }
25
+
26
+ public String getMultipartThreshold()
27
+ {
28
+ return multipartThreshold;
29
+ }
30
+ }
@@ -0,0 +1,41 @@
1
+ package org.embulk.output.s3v2.strategy;
2
+
3
+ import org.embulk.output.s3v2.PluginTask;
4
+ import org.embulk.output.s3v2.s3.S3ClientManager;
5
+ import org.embulk.output.s3v2.s3.S3MultiPartStatus;
6
+ import org.embulk.spi.TransactionalFileOutput;
7
+
8
+ abstract class AbstractStrategy implements TransactionalFileOutput
9
+ {
10
+ protected S3ClientManager s3;
11
+ protected PluginTask task;
12
+ protected int taskIndex;
13
+
14
+ public AbstractStrategy(PluginTask task, int taskIndex)
15
+ {
16
+ s3 = new S3ClientManager(task.getRegion());
17
+ this.task = task;
18
+ this.taskIndex = taskIndex;
19
+
20
+ if (!validate()) {
21
+ throw new IllegalArgumentException("Unsupported parameters combination");
22
+ }
23
+ }
24
+
25
+ /**
26
+ * Validation for PluginTask parameters combination.
27
+ * @see PluginTask
28
+ */
29
+ protected abstract boolean validate();
30
+
31
+ protected final String getFileExtension()
32
+ {
33
+ return task.getExtension().startsWith(".") ? task.getExtension() : "." + task.getExtension();
34
+ }
35
+
36
+ protected final S3MultiPartStatus setUpS3MultiPartStatus()
37
+ {
38
+ return new S3MultiPartStatus(task.getMaxConcurrentRequests(), task.getMultipartChunksize(),
39
+ task.getMultipartThreshold());
40
+ }
41
+ }
@@ -0,0 +1,76 @@
1
+ package org.embulk.output.s3v2.strategy;
2
+
3
+ import org.embulk.config.TaskReport;
4
+ import org.embulk.output.s3v2.PluginTask;
5
+ import org.embulk.spi.Buffer;
6
+ import org.embulk.spi.Exec;
7
+ import org.slf4j.Logger;
8
+ import org.slf4j.LoggerFactory;
9
+
10
+ import java.nio.ByteBuffer;
11
+
12
+ public class BufferedStrategy extends AbstractStrategy
13
+ {
14
+ private final Logger logger = LoggerFactory.getLogger(BufferedStrategy.class);
15
+ private final String bucket;
16
+ private String s3ObjectKey;
17
+ private ByteBuffer byteBuffer;
18
+
19
+ public BufferedStrategy(PluginTask task, int taskIndex)
20
+ {
21
+ super(task, taskIndex);
22
+ this.bucket = task.getBucket();
23
+ }
24
+
25
+ @Override
26
+ protected boolean validate()
27
+ {
28
+ if (task.getEnableMultiPartUpload()) {
29
+ throw new UnsupportedOperationException("Buffering strategy does not support S3 multi-part upload.");
30
+ }
31
+ return true;
32
+ }
33
+
34
+ @Override
35
+ public void nextFile()
36
+ {
37
+ String ext = getFileExtension();
38
+ s3ObjectKey = task.getObjectKeyPrefix() + "-" + taskIndex + ext;
39
+ logger.info("[task:" + taskIndex + "] Temporary file is not created.");
40
+ }
41
+
42
+ @Override
43
+ public void add(Buffer buffer)
44
+ {
45
+ byteBuffer = ByteBuffer.wrap(buffer.array(), buffer.offset(), buffer.limit());
46
+ }
47
+
48
+ @Override
49
+ public void finish()
50
+ {
51
+ // Do nothing.
52
+ }
53
+
54
+ @Override
55
+ public void close()
56
+ {
57
+ // Do nothing.
58
+ }
59
+
60
+ @Override
61
+ public void abort()
62
+ {
63
+ if (s3.existsObject(bucket, s3ObjectKey)) {
64
+ s3.deleteObject(bucket, s3ObjectKey);
65
+ logger.info("Delete s3://" + bucket + "/" + s3ObjectKey);
66
+ }
67
+ }
68
+
69
+ @Override
70
+ public TaskReport commit()
71
+ {
72
+ s3.putObject(bucket, s3ObjectKey, byteBuffer);
73
+ logger.info("Put into s3://" + bucket + "/" + s3ObjectKey);
74
+ return Exec.newTaskReport();
75
+ }
76
+ }
@@ -0,0 +1,166 @@
1
+ package org.embulk.output.s3v2.strategy;
2
+
3
+ import org.embulk.config.TaskReport;
4
+ import org.embulk.output.s3v2.PluginTask;
5
+ import org.embulk.output.s3v2.s3.S3MultiPartStatus;
6
+ import org.embulk.output.s3v2.util.ThresholdComputation;
7
+ import org.embulk.spi.Buffer;
8
+ import org.embulk.spi.Exec;
9
+ import org.slf4j.Logger;
10
+ import org.slf4j.LoggerFactory;
11
+
12
+ import java.io.BufferedOutputStream;
13
+ import java.io.FileOutputStream;
14
+ import java.io.IOException;
15
+ import java.nio.file.Files;
16
+ import java.nio.file.Path;
17
+ import java.nio.file.Paths;
18
+ import java.util.Objects;
19
+
20
+ public class FileOutputStrategy extends AbstractStrategy
21
+ {
22
+ private final Logger logger = LoggerFactory.getLogger(FileOutputStrategy.class);
23
+ private final String bucket;
24
+ private String s3ObjectKey;
25
+ private BufferedOutputStream bufferStream;
26
+ private Path tempFilePath;
27
+
28
+ public FileOutputStrategy(PluginTask task, int taskIndex)
29
+ {
30
+ super(task, taskIndex);
31
+ this.bucket = task.getBucket();
32
+ }
33
+
34
+ @Override
35
+ protected boolean validate()
36
+ {
37
+ return true;
38
+ }
39
+
40
+ @Override
41
+ public void nextFile()
42
+ {
43
+ String ext = getFileExtension();
44
+ createTempFile(ext);
45
+ s3ObjectKey = task.getObjectKeyPrefix() + "-" + taskIndex + ext;
46
+ }
47
+
48
+ private void createTempFile(String ext)
49
+ {
50
+ try {
51
+ Path tempPath = Paths.get(task.getTempPath());
52
+ if (Files.notExists(tempPath)) {
53
+ Files.createDirectory(tempPath);
54
+ }
55
+
56
+ String dirSeparator = tempPath.endsWith("/") ? "" : "/";
57
+ tempFilePath = Paths.get(task.getTempPath() + dirSeparator
58
+ + task.getTempFilePrefix() + "-" + taskIndex
59
+ + ext);
60
+ if (Files.exists(tempFilePath)) {
61
+ Files.delete(tempFilePath);
62
+ logger.info("Deleted previous: " + tempFilePath);
63
+ }
64
+ Files.createFile(tempFilePath);
65
+ logger.info("Created: " + tempFilePath);
66
+
67
+ bufferStream = new BufferedOutputStream(new FileOutputStream(tempFilePath.toFile()));
68
+ }
69
+ catch (IOException ex) {
70
+ closeBuffer();
71
+ ex.printStackTrace();
72
+ throw new RuntimeException("Failed to create temp file: " + tempFilePath);
73
+ }
74
+ }
75
+
76
+ @Override
77
+ public void add(Buffer buffer)
78
+ {
79
+ try {
80
+ bufferStream.write(buffer.array(), buffer.offset(), buffer.limit());
81
+ }
82
+ catch (IOException ex) {
83
+ closeBuffer();
84
+ ex.printStackTrace();
85
+ throw new RuntimeException("Failed to buffer data.");
86
+ }
87
+ }
88
+
89
+ @Override
90
+ public void finish()
91
+ {
92
+ try {
93
+ bufferStream.flush();
94
+ }
95
+ catch (IOException ex) {
96
+ closeBuffer();
97
+ ex.printStackTrace();
98
+ throw new RuntimeException("Failed to write out data.");
99
+ }
100
+ }
101
+
102
+ @Override
103
+ public void close()
104
+ {
105
+ closeBuffer();
106
+ }
107
+
108
+ @Override
109
+ public void abort()
110
+ {
111
+ if (s3.existsObject(bucket, s3ObjectKey)) {
112
+ s3.deleteObject(bucket, s3ObjectKey);
113
+ logger.info("Deleted s3://" + bucket + "/" + s3ObjectKey);
114
+ }
115
+
116
+ try {
117
+ Files.delete(tempFilePath);
118
+ logger.info("Deleted " + tempFilePath);
119
+ }
120
+ catch (IOException ex) {
121
+ ex.printStackTrace();
122
+ throw new RuntimeException("Failed to delete: " + tempFilePath);
123
+ }
124
+ finally {
125
+ closeBuffer();
126
+ }
127
+ }
128
+
129
+ @Override
130
+ public TaskReport commit()
131
+ {
132
+ long start = System.currentTimeMillis();
133
+ try {
134
+ if (task.getEnableMultiPartUpload()
135
+ && ThresholdComputation.largerThanThreshold(task.getMultipartThreshold(), Files.size(tempFilePath))) {
136
+ S3MultiPartStatus status = setUpS3MultiPartStatus();
137
+ s3.multiPartUpload(bucket, s3ObjectKey, tempFilePath, status);
138
+ logger.info("[Done] Multipart upload: s3://" + bucket + "/" + s3ObjectKey);
139
+ }
140
+ else {
141
+ s3.putObject(bucket, s3ObjectKey, tempFilePath);
142
+ logger.info("[Done] Put: s3://" + bucket + "/" + s3ObjectKey);
143
+ }
144
+ }
145
+ catch (IOException ex) {
146
+ ex.printStackTrace();
147
+ abort();
148
+ }
149
+ long end = System.currentTimeMillis();
150
+ logger.info("Time taken to upload s3://" + bucket + "/" + s3ObjectKey + ": " + (end - start) + "ms");
151
+ return Exec.newTaskReport();
152
+ }
153
+
154
+ private void closeBuffer()
155
+ {
156
+ if (!Objects.isNull(bufferStream)) {
157
+ try {
158
+ bufferStream.close();
159
+ }
160
+ catch (IOException ex) {
161
+ ex.printStackTrace();
162
+ throw new RuntimeException("Failed to close BufferedOutputStream.");
163
+ }
164
+ }
165
+ }
166
+ }
@@ -0,0 +1,39 @@
1
+ package org.embulk.output.s3v2.util;
2
+
3
+ abstract class AbstractUnitComputation
4
+ {
5
+ protected enum ComputeUnits
6
+ {
7
+ KB(Math.pow(10, 3)),
8
+ MB(Math.pow(10, 6)),
9
+ GB(Math.pow(10, 9)),
10
+ TB(Math.pow(10, 12));
11
+
12
+ private final double unit;
13
+
14
+ private ComputeUnits(double unit)
15
+ {
16
+ this.unit = unit;
17
+ }
18
+
19
+ public double getUnit()
20
+ {
21
+ return unit;
22
+ }
23
+ }
24
+
25
+ protected static String getSize(String value)
26
+ {
27
+ return value.replaceAll("[^0-9]", "");
28
+ }
29
+
30
+ protected static String getUnit(String value)
31
+ {
32
+ return value.replaceAll("[0-9]", "");
33
+ }
34
+
35
+ protected static boolean validateValue(String value)
36
+ {
37
+ return value.matches(getSize(value) + getUnit(value));
38
+ }
39
+ }
@@ -0,0 +1,26 @@
1
+ package org.embulk.output.s3v2.util;
2
+
3
+ public class ChunksizeComputation extends AbstractUnitComputation
4
+ {
5
+ private ChunksizeComputation()
6
+ {
7
+ // Do nothing.
8
+ }
9
+
10
+ public static int getChunksizeBytes(String chunksize)
11
+ {
12
+ if (!validateValue(chunksize)) {
13
+ throw new IllegalArgumentException("Unrecognized value of multipart_chunksize: " + chunksize);
14
+ }
15
+
16
+ String sizePartOfChunksize = getSize(chunksize);
17
+ String unitPartOfChunksize = getUnit(chunksize);
18
+ ComputeUnits e = ComputeUnits.valueOf(unitPartOfChunksize.toUpperCase());
19
+ double chunksizeValue = (Double.valueOf(sizePartOfChunksize) * e.getUnit());
20
+ if (chunksizeValue < 5.0 * ComputeUnits.MB.getUnit() || 2.0 * ComputeUnits.GB.getUnit() < chunksizeValue) {
21
+ throw new IllegalArgumentException("Unrecognized range of value multipart_chunksize: " + chunksize);
22
+ }
23
+
24
+ return (int) chunksizeValue;
25
+ }
26
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.s3v2.util;
2
+
3
+ public class ThresholdComputation extends AbstractUnitComputation
4
+ {
5
+ private ThresholdComputation()
6
+ {
7
+ // Do nothing.
8
+ }
9
+
10
+ public static boolean largerThanThreshold(String threshold, long computationTargetSize)
11
+ {
12
+ if (!validateValue(threshold)) {
13
+ throw new IllegalArgumentException("Unrecognized value of multipart_threshold: " + threshold);
14
+ }
15
+
16
+ String thresholdSize = getSize(threshold);
17
+ String thresholdUnit = getUnit(threshold);
18
+ ComputeUnits e = ComputeUnits.valueOf(thresholdUnit.toUpperCase());
19
+ double thresholdValue = (Double.valueOf(thresholdSize) * e.getUnit());
20
+
21
+ return (double) computationTargetSize > thresholdValue;
22
+ }
23
+ }
@@ -0,0 +1,45 @@
1
+ package org.embulk.output.s3v2.strategy;
2
+
3
+ import org.embulk.output.s3v2.PluginTask;
4
+ import org.junit.jupiter.api.Assertions;
5
+ import org.junit.jupiter.api.BeforeEach;
6
+ import org.junit.jupiter.api.DisplayName;
7
+ import org.junit.jupiter.api.Test;
8
+ import org.junit.jupiter.api.extension.ExtendWith;
9
+ import org.mockito.Mockito;
10
+ import org.mockito.junit.jupiter.MockitoExtension;
11
+
12
+ /**
13
+ * @see BufferedStrategy
14
+ */
15
+ @ExtendWith(MockitoExtension.class)
16
+ public class BufferedStrategyTests
17
+ {
18
+ private PluginTask task;
19
+
20
+ @BeforeEach
21
+ public void setUp()
22
+ {
23
+ task = Mockito.mock(PluginTask.class);
24
+ Mockito.doReturn("ap-northeast-1").when(task).getRegion();
25
+ }
26
+
27
+ @Test
28
+ @DisplayName("Test validate true")
29
+ public void testValidateTrue() throws Exception
30
+ {
31
+ Mockito.doReturn(false).when(task).getEnableMultiPartUpload();
32
+ BufferedStrategy output = new BufferedStrategy(task, 0);
33
+ Assertions.assertTrue(output.validate());
34
+ }
35
+
36
+ @Test
37
+ @DisplayName("Test validate false")
38
+ public void testValidateInvalidCase() throws Exception
39
+ {
40
+ Mockito.doReturn(true).when(task).getEnableMultiPartUpload();
41
+ UnsupportedOperationException ex = Assertions.assertThrows(UnsupportedOperationException.class,
42
+ () -> new BufferedStrategy(task, 0));
43
+ Assertions.assertEquals("Buffering strategy does not support S3 multi-part upload.", ex.getMessage());
44
+ }
45
+ }