embulk-output-s3v2 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +26 -0
- data/.github/workflows/release.yml +37 -0
- data/.gitignore +34 -0
- data/LICENSE +21 -0
- data/README.md +54 -0
- data/build.gradle +103 -0
- data/config/checkstyle/checkstyle.xml +130 -0
- data/config/checkstyle/default.xml +110 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +5 -0
- data/gradlew +172 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/output/s3v2.rb +5 -0
- data/settings.gradle +2 -0
- data/src/main/java/org/embulk/output/s3v2/PluginTask.java +53 -0
- data/src/main/java/org/embulk/output/s3v2/S3V2FileOutputPlugin.java +49 -0
- data/src/main/java/org/embulk/output/s3v2/s3/S3ClientManager.java +161 -0
- data/src/main/java/org/embulk/output/s3v2/s3/S3MultiPartStatus.java +30 -0
- data/src/main/java/org/embulk/output/s3v2/strategy/AbstractStrategy.java +41 -0
- data/src/main/java/org/embulk/output/s3v2/strategy/BufferedStrategy.java +76 -0
- data/src/main/java/org/embulk/output/s3v2/strategy/FileOutputStrategy.java +166 -0
- data/src/main/java/org/embulk/output/s3v2/util/AbstractUnitComputation.java +39 -0
- data/src/main/java/org/embulk/output/s3v2/util/ChunksizeComputation.java +26 -0
- data/src/main/java/org/embulk/output/s3v2/util/ThresholdComputation.java +23 -0
- data/src/test/java/org/embulk/output/s3v2/strategy/BufferedStrategyTests.java +45 -0
- data/src/test/java/org/embulk/output/s3v2/util/ChunksizeComputationTests.java +53 -0
- data/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker +1 -0
- metadata +110 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
package org.embulk.output.s3v2.s3;
|
2
|
+
|
3
|
+
public class S3MultiPartStatus
|
4
|
+
{
|
5
|
+
private final int maxConcurrentRequests;
|
6
|
+
private final String multipartChunksize;
|
7
|
+
private final String multipartThreshold;
|
8
|
+
|
9
|
+
public S3MultiPartStatus(int maxConcurrentRequests, String multipartChunksize, String multipartThreshold)
|
10
|
+
{
|
11
|
+
this.maxConcurrentRequests = maxConcurrentRequests;
|
12
|
+
this.multipartChunksize = multipartChunksize;
|
13
|
+
this.multipartThreshold = multipartThreshold;
|
14
|
+
}
|
15
|
+
|
16
|
+
public int getMaxConcurrentRequests()
|
17
|
+
{
|
18
|
+
return maxConcurrentRequests;
|
19
|
+
}
|
20
|
+
|
21
|
+
public String getMultipartChunksize()
|
22
|
+
{
|
23
|
+
return multipartChunksize;
|
24
|
+
}
|
25
|
+
|
26
|
+
public String getMultipartThreshold()
|
27
|
+
{
|
28
|
+
return multipartThreshold;
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package org.embulk.output.s3v2.strategy;
|
2
|
+
|
3
|
+
import org.embulk.output.s3v2.PluginTask;
|
4
|
+
import org.embulk.output.s3v2.s3.S3ClientManager;
|
5
|
+
import org.embulk.output.s3v2.s3.S3MultiPartStatus;
|
6
|
+
import org.embulk.spi.TransactionalFileOutput;
|
7
|
+
|
8
|
+
abstract class AbstractStrategy implements TransactionalFileOutput
|
9
|
+
{
|
10
|
+
protected S3ClientManager s3;
|
11
|
+
protected PluginTask task;
|
12
|
+
protected int taskIndex;
|
13
|
+
|
14
|
+
public AbstractStrategy(PluginTask task, int taskIndex)
|
15
|
+
{
|
16
|
+
s3 = new S3ClientManager(task.getRegion());
|
17
|
+
this.task = task;
|
18
|
+
this.taskIndex = taskIndex;
|
19
|
+
|
20
|
+
if (!validate()) {
|
21
|
+
throw new IllegalArgumentException("Unsupported parameters combination");
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
/**
|
26
|
+
* Validation for PluginTask parameters combination.
|
27
|
+
* @see PluginTask
|
28
|
+
*/
|
29
|
+
protected abstract boolean validate();
|
30
|
+
|
31
|
+
protected final String getFileExtension()
|
32
|
+
{
|
33
|
+
return task.getExtension().startsWith(".") ? task.getExtension() : "." + task.getExtension();
|
34
|
+
}
|
35
|
+
|
36
|
+
protected final S3MultiPartStatus setUpS3MultiPartStatus()
|
37
|
+
{
|
38
|
+
return new S3MultiPartStatus(task.getMaxConcurrentRequests(), task.getMultipartChunksize(),
|
39
|
+
task.getMultipartThreshold());
|
40
|
+
}
|
41
|
+
}
|
@@ -0,0 +1,76 @@
|
|
1
|
+
package org.embulk.output.s3v2.strategy;
|
2
|
+
|
3
|
+
import org.embulk.config.TaskReport;
|
4
|
+
import org.embulk.output.s3v2.PluginTask;
|
5
|
+
import org.embulk.spi.Buffer;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.slf4j.Logger;
|
8
|
+
import org.slf4j.LoggerFactory;
|
9
|
+
|
10
|
+
import java.nio.ByteBuffer;
|
11
|
+
|
12
|
+
public class BufferedStrategy extends AbstractStrategy
|
13
|
+
{
|
14
|
+
private final Logger logger = LoggerFactory.getLogger(BufferedStrategy.class);
|
15
|
+
private final String bucket;
|
16
|
+
private String s3ObjectKey;
|
17
|
+
private ByteBuffer byteBuffer;
|
18
|
+
|
19
|
+
public BufferedStrategy(PluginTask task, int taskIndex)
|
20
|
+
{
|
21
|
+
super(task, taskIndex);
|
22
|
+
this.bucket = task.getBucket();
|
23
|
+
}
|
24
|
+
|
25
|
+
@Override
|
26
|
+
protected boolean validate()
|
27
|
+
{
|
28
|
+
if (task.getEnableMultiPartUpload()) {
|
29
|
+
throw new UnsupportedOperationException("Buffering strategy does not support S3 multi-part upload.");
|
30
|
+
}
|
31
|
+
return true;
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
public void nextFile()
|
36
|
+
{
|
37
|
+
String ext = getFileExtension();
|
38
|
+
s3ObjectKey = task.getObjectKeyPrefix() + "-" + taskIndex + ext;
|
39
|
+
logger.info("[task:" + taskIndex + "] Temporary file is not created.");
|
40
|
+
}
|
41
|
+
|
42
|
+
@Override
|
43
|
+
public void add(Buffer buffer)
|
44
|
+
{
|
45
|
+
byteBuffer = ByteBuffer.wrap(buffer.array(), buffer.offset(), buffer.limit());
|
46
|
+
}
|
47
|
+
|
48
|
+
@Override
|
49
|
+
public void finish()
|
50
|
+
{
|
51
|
+
// Do nothing.
|
52
|
+
}
|
53
|
+
|
54
|
+
@Override
|
55
|
+
public void close()
|
56
|
+
{
|
57
|
+
// Do nothing.
|
58
|
+
}
|
59
|
+
|
60
|
+
@Override
|
61
|
+
public void abort()
|
62
|
+
{
|
63
|
+
if (s3.existsObject(bucket, s3ObjectKey)) {
|
64
|
+
s3.deleteObject(bucket, s3ObjectKey);
|
65
|
+
logger.info("Delete s3://" + bucket + "/" + s3ObjectKey);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public TaskReport commit()
|
71
|
+
{
|
72
|
+
s3.putObject(bucket, s3ObjectKey, byteBuffer);
|
73
|
+
logger.info("Put into s3://" + bucket + "/" + s3ObjectKey);
|
74
|
+
return Exec.newTaskReport();
|
75
|
+
}
|
76
|
+
}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
package org.embulk.output.s3v2.strategy;
|
2
|
+
|
3
|
+
import org.embulk.config.TaskReport;
|
4
|
+
import org.embulk.output.s3v2.PluginTask;
|
5
|
+
import org.embulk.output.s3v2.s3.S3MultiPartStatus;
|
6
|
+
import org.embulk.output.s3v2.util.ThresholdComputation;
|
7
|
+
import org.embulk.spi.Buffer;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
import org.slf4j.LoggerFactory;
|
11
|
+
|
12
|
+
import java.io.BufferedOutputStream;
|
13
|
+
import java.io.FileOutputStream;
|
14
|
+
import java.io.IOException;
|
15
|
+
import java.nio.file.Files;
|
16
|
+
import java.nio.file.Path;
|
17
|
+
import java.nio.file.Paths;
|
18
|
+
import java.util.Objects;
|
19
|
+
|
20
|
+
public class FileOutputStrategy extends AbstractStrategy
|
21
|
+
{
|
22
|
+
private final Logger logger = LoggerFactory.getLogger(FileOutputStrategy.class);
|
23
|
+
private final String bucket;
|
24
|
+
private String s3ObjectKey;
|
25
|
+
private BufferedOutputStream bufferStream;
|
26
|
+
private Path tempFilePath;
|
27
|
+
|
28
|
+
public FileOutputStrategy(PluginTask task, int taskIndex)
|
29
|
+
{
|
30
|
+
super(task, taskIndex);
|
31
|
+
this.bucket = task.getBucket();
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
protected boolean validate()
|
36
|
+
{
|
37
|
+
return true;
|
38
|
+
}
|
39
|
+
|
40
|
+
@Override
|
41
|
+
public void nextFile()
|
42
|
+
{
|
43
|
+
String ext = getFileExtension();
|
44
|
+
createTempFile(ext);
|
45
|
+
s3ObjectKey = task.getObjectKeyPrefix() + "-" + taskIndex + ext;
|
46
|
+
}
|
47
|
+
|
48
|
+
private void createTempFile(String ext)
|
49
|
+
{
|
50
|
+
try {
|
51
|
+
Path tempPath = Paths.get(task.getTempPath());
|
52
|
+
if (Files.notExists(tempPath)) {
|
53
|
+
Files.createDirectory(tempPath);
|
54
|
+
}
|
55
|
+
|
56
|
+
String dirSeparator = tempPath.endsWith("/") ? "" : "/";
|
57
|
+
tempFilePath = Paths.get(task.getTempPath() + dirSeparator
|
58
|
+
+ task.getTempFilePrefix() + "-" + taskIndex
|
59
|
+
+ ext);
|
60
|
+
if (Files.exists(tempFilePath)) {
|
61
|
+
Files.delete(tempFilePath);
|
62
|
+
logger.info("Deleted previous: " + tempFilePath);
|
63
|
+
}
|
64
|
+
Files.createFile(tempFilePath);
|
65
|
+
logger.info("Created: " + tempFilePath);
|
66
|
+
|
67
|
+
bufferStream = new BufferedOutputStream(new FileOutputStream(tempFilePath.toFile()));
|
68
|
+
}
|
69
|
+
catch (IOException ex) {
|
70
|
+
closeBuffer();
|
71
|
+
ex.printStackTrace();
|
72
|
+
throw new RuntimeException("Failed to create temp file: " + tempFilePath);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
@Override
|
77
|
+
public void add(Buffer buffer)
|
78
|
+
{
|
79
|
+
try {
|
80
|
+
bufferStream.write(buffer.array(), buffer.offset(), buffer.limit());
|
81
|
+
}
|
82
|
+
catch (IOException ex) {
|
83
|
+
closeBuffer();
|
84
|
+
ex.printStackTrace();
|
85
|
+
throw new RuntimeException("Failed to buffer data.");
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
@Override
|
90
|
+
public void finish()
|
91
|
+
{
|
92
|
+
try {
|
93
|
+
bufferStream.flush();
|
94
|
+
}
|
95
|
+
catch (IOException ex) {
|
96
|
+
closeBuffer();
|
97
|
+
ex.printStackTrace();
|
98
|
+
throw new RuntimeException("Failed to write out data.");
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
@Override
|
103
|
+
public void close()
|
104
|
+
{
|
105
|
+
closeBuffer();
|
106
|
+
}
|
107
|
+
|
108
|
+
@Override
|
109
|
+
public void abort()
|
110
|
+
{
|
111
|
+
if (s3.existsObject(bucket, s3ObjectKey)) {
|
112
|
+
s3.deleteObject(bucket, s3ObjectKey);
|
113
|
+
logger.info("Deleted s3://" + bucket + "/" + s3ObjectKey);
|
114
|
+
}
|
115
|
+
|
116
|
+
try {
|
117
|
+
Files.delete(tempFilePath);
|
118
|
+
logger.info("Deleted " + tempFilePath);
|
119
|
+
}
|
120
|
+
catch (IOException ex) {
|
121
|
+
ex.printStackTrace();
|
122
|
+
throw new RuntimeException("Failed to delete: " + tempFilePath);
|
123
|
+
}
|
124
|
+
finally {
|
125
|
+
closeBuffer();
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
@Override
|
130
|
+
public TaskReport commit()
|
131
|
+
{
|
132
|
+
long start = System.currentTimeMillis();
|
133
|
+
try {
|
134
|
+
if (task.getEnableMultiPartUpload()
|
135
|
+
&& ThresholdComputation.largerThanThreshold(task.getMultipartThreshold(), Files.size(tempFilePath))) {
|
136
|
+
S3MultiPartStatus status = setUpS3MultiPartStatus();
|
137
|
+
s3.multiPartUpload(bucket, s3ObjectKey, tempFilePath, status);
|
138
|
+
logger.info("[Done] Multipart upload: s3://" + bucket + "/" + s3ObjectKey);
|
139
|
+
}
|
140
|
+
else {
|
141
|
+
s3.putObject(bucket, s3ObjectKey, tempFilePath);
|
142
|
+
logger.info("[Done] Put: s3://" + bucket + "/" + s3ObjectKey);
|
143
|
+
}
|
144
|
+
}
|
145
|
+
catch (IOException ex) {
|
146
|
+
ex.printStackTrace();
|
147
|
+
abort();
|
148
|
+
}
|
149
|
+
long end = System.currentTimeMillis();
|
150
|
+
logger.info("Time taken to upload s3://" + bucket + "/" + s3ObjectKey + ": " + (end - start) + "ms");
|
151
|
+
return Exec.newTaskReport();
|
152
|
+
}
|
153
|
+
|
154
|
+
private void closeBuffer()
|
155
|
+
{
|
156
|
+
if (!Objects.isNull(bufferStream)) {
|
157
|
+
try {
|
158
|
+
bufferStream.close();
|
159
|
+
}
|
160
|
+
catch (IOException ex) {
|
161
|
+
ex.printStackTrace();
|
162
|
+
throw new RuntimeException("Failed to close BufferedOutputStream.");
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
package org.embulk.output.s3v2.util;
|
2
|
+
|
3
|
+
abstract class AbstractUnitComputation
|
4
|
+
{
|
5
|
+
protected enum ComputeUnits
|
6
|
+
{
|
7
|
+
KB(Math.pow(10, 3)),
|
8
|
+
MB(Math.pow(10, 6)),
|
9
|
+
GB(Math.pow(10, 9)),
|
10
|
+
TB(Math.pow(10, 12));
|
11
|
+
|
12
|
+
private final double unit;
|
13
|
+
|
14
|
+
private ComputeUnits(double unit)
|
15
|
+
{
|
16
|
+
this.unit = unit;
|
17
|
+
}
|
18
|
+
|
19
|
+
public double getUnit()
|
20
|
+
{
|
21
|
+
return unit;
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
protected static String getSize(String value)
|
26
|
+
{
|
27
|
+
return value.replaceAll("[^0-9]", "");
|
28
|
+
}
|
29
|
+
|
30
|
+
protected static String getUnit(String value)
|
31
|
+
{
|
32
|
+
return value.replaceAll("[0-9]", "");
|
33
|
+
}
|
34
|
+
|
35
|
+
protected static boolean validateValue(String value)
|
36
|
+
{
|
37
|
+
return value.matches(getSize(value) + getUnit(value));
|
38
|
+
}
|
39
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
package org.embulk.output.s3v2.util;
|
2
|
+
|
3
|
+
public class ChunksizeComputation extends AbstractUnitComputation
|
4
|
+
{
|
5
|
+
private ChunksizeComputation()
|
6
|
+
{
|
7
|
+
// Do nothing.
|
8
|
+
}
|
9
|
+
|
10
|
+
public static int getChunksizeBytes(String chunksize)
|
11
|
+
{
|
12
|
+
if (!validateValue(chunksize)) {
|
13
|
+
throw new IllegalArgumentException("Unrecognized value of multipart_chunksize: " + chunksize);
|
14
|
+
}
|
15
|
+
|
16
|
+
String sizePartOfChunksize = getSize(chunksize);
|
17
|
+
String unitPartOfChunksize = getUnit(chunksize);
|
18
|
+
ComputeUnits e = ComputeUnits.valueOf(unitPartOfChunksize.toUpperCase());
|
19
|
+
double chunksizeValue = (Double.valueOf(sizePartOfChunksize) * e.getUnit());
|
20
|
+
if (chunksizeValue < 5.0 * ComputeUnits.MB.getUnit() || 2.0 * ComputeUnits.GB.getUnit() < chunksizeValue) {
|
21
|
+
throw new IllegalArgumentException("Unrecognized range of value multipart_chunksize: " + chunksize);
|
22
|
+
}
|
23
|
+
|
24
|
+
return (int) chunksizeValue;
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.output.s3v2.util;
|
2
|
+
|
3
|
+
public class ThresholdComputation extends AbstractUnitComputation
|
4
|
+
{
|
5
|
+
private ThresholdComputation()
|
6
|
+
{
|
7
|
+
// Do nothing.
|
8
|
+
}
|
9
|
+
|
10
|
+
public static boolean largerThanThreshold(String threshold, long computationTargetSize)
|
11
|
+
{
|
12
|
+
if (!validateValue(threshold)) {
|
13
|
+
throw new IllegalArgumentException("Unrecognized value of multipart_threshold: " + threshold);
|
14
|
+
}
|
15
|
+
|
16
|
+
String thresholdSize = getSize(threshold);
|
17
|
+
String thresholdUnit = getUnit(threshold);
|
18
|
+
ComputeUnits e = ComputeUnits.valueOf(thresholdUnit.toUpperCase());
|
19
|
+
double thresholdValue = (Double.valueOf(thresholdSize) * e.getUnit());
|
20
|
+
|
21
|
+
return (double) computationTargetSize > thresholdValue;
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package org.embulk.output.s3v2.strategy;
|
2
|
+
|
3
|
+
import org.embulk.output.s3v2.PluginTask;
|
4
|
+
import org.junit.jupiter.api.Assertions;
|
5
|
+
import org.junit.jupiter.api.BeforeEach;
|
6
|
+
import org.junit.jupiter.api.DisplayName;
|
7
|
+
import org.junit.jupiter.api.Test;
|
8
|
+
import org.junit.jupiter.api.extension.ExtendWith;
|
9
|
+
import org.mockito.Mockito;
|
10
|
+
import org.mockito.junit.jupiter.MockitoExtension;
|
11
|
+
|
12
|
+
/**
|
13
|
+
* @see BufferedStrategy
|
14
|
+
*/
|
15
|
+
@ExtendWith(MockitoExtension.class)
|
16
|
+
public class BufferedStrategyTests
|
17
|
+
{
|
18
|
+
private PluginTask task;
|
19
|
+
|
20
|
+
@BeforeEach
|
21
|
+
public void setUp()
|
22
|
+
{
|
23
|
+
task = Mockito.mock(PluginTask.class);
|
24
|
+
Mockito.doReturn("ap-northeast-1").when(task).getRegion();
|
25
|
+
}
|
26
|
+
|
27
|
+
@Test
|
28
|
+
@DisplayName("Test validate true")
|
29
|
+
public void testValidateTrue() throws Exception
|
30
|
+
{
|
31
|
+
Mockito.doReturn(false).when(task).getEnableMultiPartUpload();
|
32
|
+
BufferedStrategy output = new BufferedStrategy(task, 0);
|
33
|
+
Assertions.assertTrue(output.validate());
|
34
|
+
}
|
35
|
+
|
36
|
+
@Test
|
37
|
+
@DisplayName("Test validate false")
|
38
|
+
public void testValidateInvalidCase() throws Exception
|
39
|
+
{
|
40
|
+
Mockito.doReturn(true).when(task).getEnableMultiPartUpload();
|
41
|
+
UnsupportedOperationException ex = Assertions.assertThrows(UnsupportedOperationException.class,
|
42
|
+
() -> new BufferedStrategy(task, 0));
|
43
|
+
Assertions.assertEquals("Buffering strategy does not support S3 multi-part upload.", ex.getMessage());
|
44
|
+
}
|
45
|
+
}
|