embulk-input-s3 0.2.21 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/{aws-java-sdk-core-1.11.253.jar → aws-java-sdk-core-1.11.466.jar} +0 -0
- data/classpath/aws-java-sdk-kms-1.11.466.jar +0 -0
- data/classpath/aws-java-sdk-s3-1.11.466.jar +0 -0
- data/classpath/{commons-codec-1.9.jar → commons-codec-1.10.jar} +0 -0
- data/classpath/embulk-input-s3-0.3.0.jar +0 -0
- data/classpath/embulk-util-aws-credentials-0.3.0.jar +0 -0
- data/classpath/httpclient-4.5.5.jar +0 -0
- data/classpath/httpcore-4.4.9.jar +0 -0
- data/classpath/jackson-databind-2.6.7.2.jar +0 -0
- data/classpath/{jmespath-java-1.11.253.jar → jmespath-java-1.11.466.jar} +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +34 -24
- data/src/main/java/org/embulk/input/s3/FileList.java +2 -2
- data/src/main/java/org/embulk/input/s3/HttpProxy.java +7 -6
- data/src/main/java/org/embulk/input/s3/S3FileInputPlugin.java +4 -3
- data/src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java +2 -1
- data/src/test/java/org/embulk/input/s3/TestHttpProxy.java +6 -5
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +2 -2
- metadata +12 -14
- data/classpath/aws-java-sdk-kms-1.11.253.jar +0 -0
- data/classpath/aws-java-sdk-s3-1.11.253.jar +0 -0
- data/classpath/embulk-input-s3-0.2.21.jar +0 -0
- data/classpath/embulk-util-aws-credentials-0.2.21.jar +0 -0
- data/classpath/httpclient-4.5.2.jar +0 -0
- data/classpath/httpcore-4.4.4.jar +0 -0
- data/classpath/jackson-annotations-2.6.0.jar +0 -0
- data/classpath/jackson-core-2.6.7.jar +0 -0
- data/classpath/jackson-databind-2.6.7.1.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7aef07b030009a6352551d4b7ebd78be174dbf53
|
4
|
+
data.tar.gz: ec9ecba65e22bc04e73819d145ccb2a6a4d8115e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 867c41e167c9addeeabc893781c912880756b9ef5a16554a16440972acd30c425500b927a17736d2892cff167d64c352e11c87386808f756453290beb0b738a7
|
7
|
+
data.tar.gz: 07f0fea1f0716c3b74d384eb8174f6193d84e0714dabfd8fa4bf8cdaaba6b25f27f1a26aa5a1746115a89a095ae3799d3000350ffdf890be4255bed29a45d369
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -17,7 +17,6 @@ import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
|
17
17
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
18
18
|
import com.amazonaws.services.s3.model.StorageClass;
|
19
19
|
import com.google.common.annotations.VisibleForTesting;
|
20
|
-
import com.google.common.base.Optional;
|
21
20
|
import org.embulk.config.Config;
|
22
21
|
import org.embulk.config.ConfigDefault;
|
23
22
|
import org.embulk.config.ConfigDiff;
|
@@ -32,6 +31,7 @@ import org.embulk.spi.Exec;
|
|
32
31
|
import org.embulk.spi.FileInputPlugin;
|
33
32
|
import org.embulk.spi.TransactionalFileInput;
|
34
33
|
import org.embulk.spi.util.InputStreamFileInput;
|
34
|
+
import org.embulk.spi.util.InputStreamFileInput.InputStreamWithHints;
|
35
35
|
import org.embulk.spi.util.ResumableInputStream;
|
36
36
|
import org.embulk.spi.util.RetryExecutor;
|
37
37
|
import org.embulk.util.aws.credentials.AwsCredentials;
|
@@ -42,7 +42,9 @@ import java.io.IOException;
|
|
42
42
|
import java.io.InputStream;
|
43
43
|
import java.util.Iterator;
|
44
44
|
import java.util.List;
|
45
|
+
import java.util.Optional;
|
45
46
|
|
47
|
+
import static java.lang.String.format;
|
46
48
|
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
47
49
|
|
48
50
|
public abstract class AbstractS3FileInputPlugin
|
@@ -54,46 +56,46 @@ public abstract class AbstractS3FileInputPlugin
|
|
54
56
|
extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
|
55
57
|
{
|
56
58
|
@Config("bucket")
|
57
|
-
|
59
|
+
String getBucket();
|
58
60
|
|
59
61
|
@Config("path_prefix")
|
60
62
|
@ConfigDefault("null")
|
61
|
-
|
63
|
+
Optional<String> getPathPrefix();
|
62
64
|
|
63
65
|
@Config("path")
|
64
66
|
@ConfigDefault("null")
|
65
|
-
|
67
|
+
Optional<String> getPath();
|
66
68
|
|
67
69
|
@Config("last_path")
|
68
70
|
@ConfigDefault("null")
|
69
|
-
|
71
|
+
Optional<String> getLastPath();
|
70
72
|
|
71
73
|
@Config("access_key_id")
|
72
74
|
@ConfigDefault("null")
|
73
|
-
|
75
|
+
Optional<String> getAccessKeyId();
|
74
76
|
|
75
77
|
@Config("http_proxy")
|
76
78
|
@ConfigDefault("null")
|
77
|
-
|
79
|
+
Optional<HttpProxy> getHttpProxy();
|
78
80
|
|
79
|
-
|
81
|
+
void setHttpProxy(Optional<HttpProxy> httpProxy);
|
80
82
|
|
81
83
|
@Config("incremental")
|
82
84
|
@ConfigDefault("true")
|
83
|
-
|
85
|
+
boolean getIncremental();
|
84
86
|
|
85
87
|
@Config("skip_glacier_objects")
|
86
88
|
@ConfigDefault("false")
|
87
|
-
|
89
|
+
boolean getSkipGlacierObjects();
|
88
90
|
|
89
91
|
// TODO timeout, ssl, etc
|
90
92
|
|
91
|
-
|
93
|
+
FileList getFiles();
|
92
94
|
|
93
|
-
|
95
|
+
void setFiles(FileList files);
|
94
96
|
|
95
97
|
@ConfigInject
|
96
|
-
|
98
|
+
BufferAllocator getBufferAllocator();
|
97
99
|
}
|
98
100
|
|
99
101
|
protected abstract Class<? extends PluginTask> getTaskClass();
|
@@ -129,7 +131,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
129
131
|
// last_path
|
130
132
|
if (task.getIncremental()) {
|
131
133
|
Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
|
132
|
-
LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.
|
134
|
+
LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
|
133
135
|
configDiff.set("last_path", lastPath);
|
134
136
|
}
|
135
137
|
return configDiff;
|
@@ -335,7 +337,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
335
337
|
boolean skipGlacierObjects,
|
336
338
|
RetryExecutor retryExec)
|
337
339
|
{
|
338
|
-
String lastKey = lastPath.
|
340
|
+
String lastKey = lastPath.orElse(null);
|
339
341
|
do {
|
340
342
|
final String finalLastKey = lastKey;
|
341
343
|
final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
|
@@ -402,10 +404,10 @@ public abstract class AbstractS3FileInputPlugin
|
|
402
404
|
@Override
|
403
405
|
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
404
406
|
{
|
405
|
-
log.warn(
|
407
|
+
log.warn(format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
406
408
|
request.setRange(offset, contentLength - 1); // [first, last]
|
407
409
|
|
408
|
-
return new DefaultRetryable<S3ObjectInputStream>("
|
410
|
+
return new DefaultRetryable<S3ObjectInputStream>(format("Getting object '%s'", request.getKey())) {
|
409
411
|
@Override
|
410
412
|
public S3ObjectInputStream call()
|
411
413
|
{
|
@@ -457,17 +459,25 @@ public abstract class AbstractS3FileInputPlugin
|
|
457
459
|
}
|
458
460
|
|
459
461
|
@Override
|
460
|
-
public
|
462
|
+
public InputStreamWithHints openNextWithHints() throws IOException
|
461
463
|
{
|
462
464
|
if (!iterator.hasNext()) {
|
463
465
|
return null;
|
464
466
|
}
|
465
|
-
String key = iterator.next();
|
466
|
-
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
467
|
+
final String key = iterator.next();
|
468
|
+
final GetObjectRequest request = new GetObjectRequest(bucket, key);
|
469
|
+
|
470
|
+
S3Object object = new DefaultRetryable<S3Object>(format("Getting object '%s'", request.getKey())) {
|
471
|
+
@Override
|
472
|
+
public S3Object call()
|
473
|
+
{
|
474
|
+
return client.getObject(request);
|
475
|
+
}
|
476
|
+
}.executeWithCheckedException(retryExec, IOException.class);
|
477
|
+
|
478
|
+
long objectSize = object.getObjectMetadata().getContentLength();
|
479
|
+
InputStream inputStream = new ResumableInputStream(object.getObjectContent(), new S3InputStreamReopener(client, request, objectSize, retryExec));
|
480
|
+
return new InputStreamWithHints(inputStream, String.format("s3://%s/%s", bucket, key));
|
471
481
|
}
|
472
482
|
|
473
483
|
@Override
|
@@ -3,7 +3,6 @@ package org.embulk.input.s3;
|
|
3
3
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
4
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
5
5
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
-
import com.google.common.base.Optional;
|
7
6
|
import com.google.common.base.Throwables;
|
8
7
|
|
9
8
|
import org.embulk.config.Config;
|
@@ -23,6 +22,7 @@ import java.nio.charset.StandardCharsets;
|
|
23
22
|
import java.util.AbstractList;
|
24
23
|
import java.util.ArrayList;
|
25
24
|
import java.util.List;
|
25
|
+
import java.util.Optional;
|
26
26
|
import java.util.regex.Pattern;
|
27
27
|
import java.util.zip.GZIPInputStream;
|
28
28
|
import java.util.zip.GZIPOutputStream;
|
@@ -179,7 +179,7 @@ public class FileList
|
|
179
179
|
catch (IOException ex) {
|
180
180
|
throw Throwables.propagate(ex);
|
181
181
|
}
|
182
|
-
return new FileList(binary.toByteArray(), getSplits(entries), Optional.
|
182
|
+
return new FileList(binary.toByteArray(), getSplits(entries), Optional.ofNullable(last));
|
183
183
|
}
|
184
184
|
|
185
185
|
private List<List<Entry>> getSplits(List<Entry> all)
|
@@ -1,10 +1,11 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import org.embulk.config.Config;
|
5
4
|
import org.embulk.config.ConfigDefault;
|
6
5
|
import org.embulk.config.Task;
|
7
6
|
|
7
|
+
import java.util.Optional;
|
8
|
+
|
8
9
|
/**
|
9
10
|
* HttpProxy is config unit for Input/Output plugins' configs.
|
10
11
|
*
|
@@ -15,21 +16,21 @@ public interface HttpProxy
|
|
15
16
|
extends Task
|
16
17
|
{
|
17
18
|
@Config("host")
|
18
|
-
|
19
|
+
String getHost();
|
19
20
|
|
20
21
|
@Config("port")
|
21
22
|
@ConfigDefault("null")
|
22
|
-
|
23
|
+
Optional<Integer> getPort();
|
23
24
|
|
24
25
|
@Config("https")
|
25
26
|
@ConfigDefault("true")
|
26
|
-
|
27
|
+
boolean getHttps();
|
27
28
|
|
28
29
|
@Config("user")
|
29
30
|
@ConfigDefault("null")
|
30
|
-
|
31
|
+
Optional<String> getUser();
|
31
32
|
|
32
33
|
@Config("password")
|
33
34
|
@ConfigDefault("null")
|
34
|
-
|
35
|
+
Optional<String> getPassword();
|
35
36
|
}
|
@@ -3,12 +3,13 @@ package org.embulk.input.s3;
|
|
3
3
|
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
|
4
4
|
import com.amazonaws.services.s3.AmazonS3;
|
5
5
|
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
|
6
|
-
import com.google.common.base.Optional;
|
7
6
|
import org.embulk.config.Config;
|
8
7
|
import org.embulk.config.ConfigDefault;
|
9
8
|
import org.embulk.spi.Exec;
|
10
9
|
import org.slf4j.Logger;
|
11
10
|
|
11
|
+
import java.util.Optional;
|
12
|
+
|
12
13
|
public class S3FileInputPlugin
|
13
14
|
extends AbstractS3FileInputPlugin
|
14
15
|
{
|
@@ -17,11 +18,11 @@ public class S3FileInputPlugin
|
|
17
18
|
{
|
18
19
|
@Config("endpoint")
|
19
20
|
@ConfigDefault("null")
|
20
|
-
|
21
|
+
Optional<String> getEndpoint();
|
21
22
|
|
22
23
|
@Config("region")
|
23
24
|
@ConfigDefault("null")
|
24
|
-
|
25
|
+
Optional<String> getRegion();
|
25
26
|
}
|
26
27
|
|
27
28
|
private static final Logger log = Exec.getLogger(S3FileInputPlugin.class);
|
@@ -6,7 +6,6 @@ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
|
6
6
|
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
7
7
|
import com.amazonaws.services.s3.model.ObjectListing;
|
8
8
|
import com.amazonaws.services.s3.model.ObjectMetadata;
|
9
|
-
import com.google.common.base.Optional;
|
10
9
|
import org.apache.http.HttpStatus;
|
11
10
|
import org.embulk.EmbulkTestRuntime;
|
12
11
|
import org.embulk.spi.util.RetryExecutor;
|
@@ -14,6 +13,8 @@ import org.junit.Before;
|
|
14
13
|
import org.junit.Rule;
|
15
14
|
import org.junit.Test;
|
16
15
|
|
16
|
+
import java.util.Optional;
|
17
|
+
|
17
18
|
import static org.mockito.Matchers.any;
|
18
19
|
import static org.mockito.Mockito.doReturn;
|
19
20
|
import static org.mockito.Mockito.doThrow;
|
@@ -1,6 +1,5 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import org.embulk.EmbulkTestRuntime;
|
5
4
|
import org.embulk.config.ConfigSource;
|
6
5
|
import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
@@ -8,6 +7,8 @@ import org.junit.Before;
|
|
8
7
|
import org.junit.Rule;
|
9
8
|
import org.junit.Test;
|
10
9
|
|
10
|
+
import java.util.Optional;
|
11
|
+
|
11
12
|
import static org.junit.Assert.assertEquals;
|
12
13
|
import static org.junit.Assert.assertTrue;
|
13
14
|
|
@@ -41,7 +42,7 @@ public class TestHttpProxy
|
|
41
42
|
String host = "my_host";
|
42
43
|
ConfigSource conf = config.deepCopy().set("host", host);
|
43
44
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
44
|
-
assertHttpProxy(host, Optional
|
45
|
+
assertHttpProxy(host, Optional.empty(), true, Optional.empty(), Optional.empty(),
|
45
46
|
httpProxy);
|
46
47
|
}
|
47
48
|
|
@@ -51,7 +52,7 @@ public class TestHttpProxy
|
|
51
52
|
.set("host", host)
|
52
53
|
.set("https", true);
|
53
54
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
54
|
-
assertHttpProxy(host, Optional
|
55
|
+
assertHttpProxy(host, Optional.empty(), true, Optional.empty(), Optional.empty(),
|
55
56
|
httpProxy);
|
56
57
|
}
|
57
58
|
|
@@ -61,7 +62,7 @@ public class TestHttpProxy
|
|
61
62
|
.set("host", host)
|
62
63
|
.set("https", false);
|
63
64
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
64
|
-
assertHttpProxy(host, Optional
|
65
|
+
assertHttpProxy(host, Optional.empty(), false, Optional.empty(), Optional.empty(),
|
65
66
|
httpProxy);
|
66
67
|
}
|
67
68
|
|
@@ -72,7 +73,7 @@ public class TestHttpProxy
|
|
72
73
|
.set("host", host)
|
73
74
|
.set("port", port);
|
74
75
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
75
|
-
assertHttpProxy(host, Optional.of(port), true, Optional
|
76
|
+
assertHttpProxy(host, Optional.of(port), true, Optional.empty(), Optional.empty(),
|
76
77
|
httpProxy);
|
77
78
|
}
|
78
79
|
|
@@ -6,7 +6,6 @@ import com.amazonaws.services.s3.model.ObjectListing;
|
|
6
6
|
import com.amazonaws.services.s3.model.Region;
|
7
7
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
8
8
|
import com.amazonaws.services.s3.model.StorageClass;
|
9
|
-
import com.google.common.base.Optional;
|
10
9
|
import com.google.common.collect.ImmutableList;
|
11
10
|
import com.google.common.collect.ImmutableMap;
|
12
11
|
import org.embulk.EmbulkTestRuntime;
|
@@ -31,6 +30,7 @@ import org.mockito.Mockito;
|
|
31
30
|
import java.lang.reflect.Field;
|
32
31
|
import java.util.ArrayList;
|
33
32
|
import java.util.List;
|
33
|
+
import java.util.Optional;
|
34
34
|
|
35
35
|
import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
36
36
|
import static org.junit.Assert.assertEquals;
|
@@ -235,7 +235,7 @@ public class TestS3FileInputPlugin
|
|
235
235
|
doReturn(s3objectList("in/aa/a", StorageClass.Glacier)).when(client).listObjects(any(ListObjectsRequest.class));
|
236
236
|
|
237
237
|
AbstractS3FileInputPlugin plugin = Mockito.mock(AbstractS3FileInputPlugin.class, Mockito.CALLS_REAL_METHODS);
|
238
|
-
plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional
|
238
|
+
plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional.empty(), false);
|
239
239
|
}
|
240
240
|
|
241
241
|
private FileList.Builder newFileList(ConfigSource config, Object... nameAndSize)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,21 +61,19 @@ files:
|
|
61
61
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
62
62
|
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
63
63
|
- src/test/resources/sample_01.csv
|
64
|
-
- classpath/
|
65
|
-
- classpath/
|
66
|
-
- classpath/
|
64
|
+
- classpath/embulk-util-aws-credentials-0.3.0.jar
|
65
|
+
- classpath/httpcore-4.4.9.jar
|
66
|
+
- classpath/httpclient-4.5.5.jar
|
67
67
|
- classpath/ion-java-1.0.2.jar
|
68
|
-
- classpath/
|
69
|
-
- classpath/
|
70
|
-
- classpath/commons-codec-1.9.jar
|
71
|
-
- classpath/jmespath-java-1.11.253.jar
|
68
|
+
- classpath/embulk-input-s3-0.3.0.jar
|
69
|
+
- classpath/aws-java-sdk-core-1.11.466.jar
|
72
70
|
- classpath/jcl-over-slf4j-1.7.12.jar
|
73
|
-
- classpath/
|
74
|
-
- classpath/
|
71
|
+
- classpath/commons-codec-1.10.jar
|
72
|
+
- classpath/jmespath-java-1.11.466.jar
|
73
|
+
- classpath/jackson-databind-2.6.7.2.jar
|
75
74
|
- classpath/jackson-dataformat-cbor-2.6.7.jar
|
76
|
-
- classpath/
|
77
|
-
- classpath/
|
78
|
-
- classpath/jackson-annotations-2.6.0.jar
|
75
|
+
- classpath/aws-java-sdk-s3-1.11.466.jar
|
76
|
+
- classpath/aws-java-sdk-kms-1.11.466.jar
|
79
77
|
homepage: https://github.com/embulk/embulk-input-s3
|
80
78
|
licenses:
|
81
79
|
- Apache 2.0
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|