embulk-input-s3 0.2.21 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/classpath/{aws-java-sdk-core-1.11.253.jar → aws-java-sdk-core-1.11.466.jar} +0 -0
- data/classpath/aws-java-sdk-kms-1.11.466.jar +0 -0
- data/classpath/aws-java-sdk-s3-1.11.466.jar +0 -0
- data/classpath/{commons-codec-1.9.jar → commons-codec-1.10.jar} +0 -0
- data/classpath/embulk-input-s3-0.3.0.jar +0 -0
- data/classpath/embulk-util-aws-credentials-0.3.0.jar +0 -0
- data/classpath/httpclient-4.5.5.jar +0 -0
- data/classpath/httpcore-4.4.9.jar +0 -0
- data/classpath/jackson-databind-2.6.7.2.jar +0 -0
- data/classpath/{jmespath-java-1.11.253.jar → jmespath-java-1.11.466.jar} +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +34 -24
- data/src/main/java/org/embulk/input/s3/FileList.java +2 -2
- data/src/main/java/org/embulk/input/s3/HttpProxy.java +7 -6
- data/src/main/java/org/embulk/input/s3/S3FileInputPlugin.java +4 -3
- data/src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java +2 -1
- data/src/test/java/org/embulk/input/s3/TestHttpProxy.java +6 -5
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +2 -2
- metadata +12 -14
- data/classpath/aws-java-sdk-kms-1.11.253.jar +0 -0
- data/classpath/aws-java-sdk-s3-1.11.253.jar +0 -0
- data/classpath/embulk-input-s3-0.2.21.jar +0 -0
- data/classpath/embulk-util-aws-credentials-0.2.21.jar +0 -0
- data/classpath/httpclient-4.5.2.jar +0 -0
- data/classpath/httpcore-4.4.4.jar +0 -0
- data/classpath/jackson-annotations-2.6.0.jar +0 -0
- data/classpath/jackson-core-2.6.7.jar +0 -0
- data/classpath/jackson-databind-2.6.7.1.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7aef07b030009a6352551d4b7ebd78be174dbf53
|
4
|
+
data.tar.gz: ec9ecba65e22bc04e73819d145ccb2a6a4d8115e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 867c41e167c9addeeabc893781c912880756b9ef5a16554a16440972acd30c425500b927a17736d2892cff167d64c352e11c87386808f756453290beb0b738a7
|
7
|
+
data.tar.gz: 07f0fea1f0716c3b74d384eb8174f6193d84e0714dabfd8fa4bf8cdaaba6b25f27f1a26aa5a1746115a89a095ae3799d3000350ffdf890be4255bed29a45d369
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -17,7 +17,6 @@ import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
|
17
17
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
18
18
|
import com.amazonaws.services.s3.model.StorageClass;
|
19
19
|
import com.google.common.annotations.VisibleForTesting;
|
20
|
-
import com.google.common.base.Optional;
|
21
20
|
import org.embulk.config.Config;
|
22
21
|
import org.embulk.config.ConfigDefault;
|
23
22
|
import org.embulk.config.ConfigDiff;
|
@@ -32,6 +31,7 @@ import org.embulk.spi.Exec;
|
|
32
31
|
import org.embulk.spi.FileInputPlugin;
|
33
32
|
import org.embulk.spi.TransactionalFileInput;
|
34
33
|
import org.embulk.spi.util.InputStreamFileInput;
|
34
|
+
import org.embulk.spi.util.InputStreamFileInput.InputStreamWithHints;
|
35
35
|
import org.embulk.spi.util.ResumableInputStream;
|
36
36
|
import org.embulk.spi.util.RetryExecutor;
|
37
37
|
import org.embulk.util.aws.credentials.AwsCredentials;
|
@@ -42,7 +42,9 @@ import java.io.IOException;
|
|
42
42
|
import java.io.InputStream;
|
43
43
|
import java.util.Iterator;
|
44
44
|
import java.util.List;
|
45
|
+
import java.util.Optional;
|
45
46
|
|
47
|
+
import static java.lang.String.format;
|
46
48
|
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
47
49
|
|
48
50
|
public abstract class AbstractS3FileInputPlugin
|
@@ -54,46 +56,46 @@ public abstract class AbstractS3FileInputPlugin
|
|
54
56
|
extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
|
55
57
|
{
|
56
58
|
@Config("bucket")
|
57
|
-
|
59
|
+
String getBucket();
|
58
60
|
|
59
61
|
@Config("path_prefix")
|
60
62
|
@ConfigDefault("null")
|
61
|
-
|
63
|
+
Optional<String> getPathPrefix();
|
62
64
|
|
63
65
|
@Config("path")
|
64
66
|
@ConfigDefault("null")
|
65
|
-
|
67
|
+
Optional<String> getPath();
|
66
68
|
|
67
69
|
@Config("last_path")
|
68
70
|
@ConfigDefault("null")
|
69
|
-
|
71
|
+
Optional<String> getLastPath();
|
70
72
|
|
71
73
|
@Config("access_key_id")
|
72
74
|
@ConfigDefault("null")
|
73
|
-
|
75
|
+
Optional<String> getAccessKeyId();
|
74
76
|
|
75
77
|
@Config("http_proxy")
|
76
78
|
@ConfigDefault("null")
|
77
|
-
|
79
|
+
Optional<HttpProxy> getHttpProxy();
|
78
80
|
|
79
|
-
|
81
|
+
void setHttpProxy(Optional<HttpProxy> httpProxy);
|
80
82
|
|
81
83
|
@Config("incremental")
|
82
84
|
@ConfigDefault("true")
|
83
|
-
|
85
|
+
boolean getIncremental();
|
84
86
|
|
85
87
|
@Config("skip_glacier_objects")
|
86
88
|
@ConfigDefault("false")
|
87
|
-
|
89
|
+
boolean getSkipGlacierObjects();
|
88
90
|
|
89
91
|
// TODO timeout, ssl, etc
|
90
92
|
|
91
|
-
|
93
|
+
FileList getFiles();
|
92
94
|
|
93
|
-
|
95
|
+
void setFiles(FileList files);
|
94
96
|
|
95
97
|
@ConfigInject
|
96
|
-
|
98
|
+
BufferAllocator getBufferAllocator();
|
97
99
|
}
|
98
100
|
|
99
101
|
protected abstract Class<? extends PluginTask> getTaskClass();
|
@@ -129,7 +131,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
129
131
|
// last_path
|
130
132
|
if (task.getIncremental()) {
|
131
133
|
Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
|
132
|
-
LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.
|
134
|
+
LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
|
133
135
|
configDiff.set("last_path", lastPath);
|
134
136
|
}
|
135
137
|
return configDiff;
|
@@ -335,7 +337,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
335
337
|
boolean skipGlacierObjects,
|
336
338
|
RetryExecutor retryExec)
|
337
339
|
{
|
338
|
-
String lastKey = lastPath.
|
340
|
+
String lastKey = lastPath.orElse(null);
|
339
341
|
do {
|
340
342
|
final String finalLastKey = lastKey;
|
341
343
|
final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
|
@@ -402,10 +404,10 @@ public abstract class AbstractS3FileInputPlugin
|
|
402
404
|
@Override
|
403
405
|
public InputStream reopen(final long offset, final Exception closedCause) throws IOException
|
404
406
|
{
|
405
|
-
log.warn(
|
407
|
+
log.warn(format("S3 read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
406
408
|
request.setRange(offset, contentLength - 1); // [first, last]
|
407
409
|
|
408
|
-
return new DefaultRetryable<S3ObjectInputStream>("
|
410
|
+
return new DefaultRetryable<S3ObjectInputStream>(format("Getting object '%s'", request.getKey())) {
|
409
411
|
@Override
|
410
412
|
public S3ObjectInputStream call()
|
411
413
|
{
|
@@ -457,17 +459,25 @@ public abstract class AbstractS3FileInputPlugin
|
|
457
459
|
}
|
458
460
|
|
459
461
|
@Override
|
460
|
-
public
|
462
|
+
public InputStreamWithHints openNextWithHints() throws IOException
|
461
463
|
{
|
462
464
|
if (!iterator.hasNext()) {
|
463
465
|
return null;
|
464
466
|
}
|
465
|
-
String key = iterator.next();
|
466
|
-
GetObjectRequest request = new GetObjectRequest(bucket, key);
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
467
|
+
final String key = iterator.next();
|
468
|
+
final GetObjectRequest request = new GetObjectRequest(bucket, key);
|
469
|
+
|
470
|
+
S3Object object = new DefaultRetryable<S3Object>(format("Getting object '%s'", request.getKey())) {
|
471
|
+
@Override
|
472
|
+
public S3Object call()
|
473
|
+
{
|
474
|
+
return client.getObject(request);
|
475
|
+
}
|
476
|
+
}.executeWithCheckedException(retryExec, IOException.class);
|
477
|
+
|
478
|
+
long objectSize = object.getObjectMetadata().getContentLength();
|
479
|
+
InputStream inputStream = new ResumableInputStream(object.getObjectContent(), new S3InputStreamReopener(client, request, objectSize, retryExec));
|
480
|
+
return new InputStreamWithHints(inputStream, String.format("s3://%s/%s", bucket, key));
|
471
481
|
}
|
472
482
|
|
473
483
|
@Override
|
@@ -3,7 +3,6 @@ package org.embulk.input.s3;
|
|
3
3
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
4
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
5
5
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
6
|
-
import com.google.common.base.Optional;
|
7
6
|
import com.google.common.base.Throwables;
|
8
7
|
|
9
8
|
import org.embulk.config.Config;
|
@@ -23,6 +22,7 @@ import java.nio.charset.StandardCharsets;
|
|
23
22
|
import java.util.AbstractList;
|
24
23
|
import java.util.ArrayList;
|
25
24
|
import java.util.List;
|
25
|
+
import java.util.Optional;
|
26
26
|
import java.util.regex.Pattern;
|
27
27
|
import java.util.zip.GZIPInputStream;
|
28
28
|
import java.util.zip.GZIPOutputStream;
|
@@ -179,7 +179,7 @@ public class FileList
|
|
179
179
|
catch (IOException ex) {
|
180
180
|
throw Throwables.propagate(ex);
|
181
181
|
}
|
182
|
-
return new FileList(binary.toByteArray(), getSplits(entries), Optional.
|
182
|
+
return new FileList(binary.toByteArray(), getSplits(entries), Optional.ofNullable(last));
|
183
183
|
}
|
184
184
|
|
185
185
|
private List<List<Entry>> getSplits(List<Entry> all)
|
@@ -1,10 +1,11 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import org.embulk.config.Config;
|
5
4
|
import org.embulk.config.ConfigDefault;
|
6
5
|
import org.embulk.config.Task;
|
7
6
|
|
7
|
+
import java.util.Optional;
|
8
|
+
|
8
9
|
/**
|
9
10
|
* HttpProxy is config unit for Input/Output plugins' configs.
|
10
11
|
*
|
@@ -15,21 +16,21 @@ public interface HttpProxy
|
|
15
16
|
extends Task
|
16
17
|
{
|
17
18
|
@Config("host")
|
18
|
-
|
19
|
+
String getHost();
|
19
20
|
|
20
21
|
@Config("port")
|
21
22
|
@ConfigDefault("null")
|
22
|
-
|
23
|
+
Optional<Integer> getPort();
|
23
24
|
|
24
25
|
@Config("https")
|
25
26
|
@ConfigDefault("true")
|
26
|
-
|
27
|
+
boolean getHttps();
|
27
28
|
|
28
29
|
@Config("user")
|
29
30
|
@ConfigDefault("null")
|
30
|
-
|
31
|
+
Optional<String> getUser();
|
31
32
|
|
32
33
|
@Config("password")
|
33
34
|
@ConfigDefault("null")
|
34
|
-
|
35
|
+
Optional<String> getPassword();
|
35
36
|
}
|
@@ -3,12 +3,13 @@ package org.embulk.input.s3;
|
|
3
3
|
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
|
4
4
|
import com.amazonaws.services.s3.AmazonS3;
|
5
5
|
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
|
6
|
-
import com.google.common.base.Optional;
|
7
6
|
import org.embulk.config.Config;
|
8
7
|
import org.embulk.config.ConfigDefault;
|
9
8
|
import org.embulk.spi.Exec;
|
10
9
|
import org.slf4j.Logger;
|
11
10
|
|
11
|
+
import java.util.Optional;
|
12
|
+
|
12
13
|
public class S3FileInputPlugin
|
13
14
|
extends AbstractS3FileInputPlugin
|
14
15
|
{
|
@@ -17,11 +18,11 @@ public class S3FileInputPlugin
|
|
17
18
|
{
|
18
19
|
@Config("endpoint")
|
19
20
|
@ConfigDefault("null")
|
20
|
-
|
21
|
+
Optional<String> getEndpoint();
|
21
22
|
|
22
23
|
@Config("region")
|
23
24
|
@ConfigDefault("null")
|
24
|
-
|
25
|
+
Optional<String> getRegion();
|
25
26
|
}
|
26
27
|
|
27
28
|
private static final Logger log = Exec.getLogger(S3FileInputPlugin.class);
|
@@ -6,7 +6,6 @@ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
|
6
6
|
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
7
7
|
import com.amazonaws.services.s3.model.ObjectListing;
|
8
8
|
import com.amazonaws.services.s3.model.ObjectMetadata;
|
9
|
-
import com.google.common.base.Optional;
|
10
9
|
import org.apache.http.HttpStatus;
|
11
10
|
import org.embulk.EmbulkTestRuntime;
|
12
11
|
import org.embulk.spi.util.RetryExecutor;
|
@@ -14,6 +13,8 @@ import org.junit.Before;
|
|
14
13
|
import org.junit.Rule;
|
15
14
|
import org.junit.Test;
|
16
15
|
|
16
|
+
import java.util.Optional;
|
17
|
+
|
17
18
|
import static org.mockito.Matchers.any;
|
18
19
|
import static org.mockito.Mockito.doReturn;
|
19
20
|
import static org.mockito.Mockito.doThrow;
|
@@ -1,6 +1,5 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import com.google.common.base.Optional;
|
4
3
|
import org.embulk.EmbulkTestRuntime;
|
5
4
|
import org.embulk.config.ConfigSource;
|
6
5
|
import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
@@ -8,6 +7,8 @@ import org.junit.Before;
|
|
8
7
|
import org.junit.Rule;
|
9
8
|
import org.junit.Test;
|
10
9
|
|
10
|
+
import java.util.Optional;
|
11
|
+
|
11
12
|
import static org.junit.Assert.assertEquals;
|
12
13
|
import static org.junit.Assert.assertTrue;
|
13
14
|
|
@@ -41,7 +42,7 @@ public class TestHttpProxy
|
|
41
42
|
String host = "my_host";
|
42
43
|
ConfigSource conf = config.deepCopy().set("host", host);
|
43
44
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
44
|
-
assertHttpProxy(host, Optional
|
45
|
+
assertHttpProxy(host, Optional.empty(), true, Optional.empty(), Optional.empty(),
|
45
46
|
httpProxy);
|
46
47
|
}
|
47
48
|
|
@@ -51,7 +52,7 @@ public class TestHttpProxy
|
|
51
52
|
.set("host", host)
|
52
53
|
.set("https", true);
|
53
54
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
54
|
-
assertHttpProxy(host, Optional
|
55
|
+
assertHttpProxy(host, Optional.empty(), true, Optional.empty(), Optional.empty(),
|
55
56
|
httpProxy);
|
56
57
|
}
|
57
58
|
|
@@ -61,7 +62,7 @@ public class TestHttpProxy
|
|
61
62
|
.set("host", host)
|
62
63
|
.set("https", false);
|
63
64
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
64
|
-
assertHttpProxy(host, Optional
|
65
|
+
assertHttpProxy(host, Optional.empty(), false, Optional.empty(), Optional.empty(),
|
65
66
|
httpProxy);
|
66
67
|
}
|
67
68
|
|
@@ -72,7 +73,7 @@ public class TestHttpProxy
|
|
72
73
|
.set("host", host)
|
73
74
|
.set("port", port);
|
74
75
|
HttpProxy httpProxy = conf.loadConfig(HttpProxy.class);
|
75
|
-
assertHttpProxy(host, Optional.of(port), true, Optional
|
76
|
+
assertHttpProxy(host, Optional.of(port), true, Optional.empty(), Optional.empty(),
|
76
77
|
httpProxy);
|
77
78
|
}
|
78
79
|
|
@@ -6,7 +6,6 @@ import com.amazonaws.services.s3.model.ObjectListing;
|
|
6
6
|
import com.amazonaws.services.s3.model.Region;
|
7
7
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
8
8
|
import com.amazonaws.services.s3.model.StorageClass;
|
9
|
-
import com.google.common.base.Optional;
|
10
9
|
import com.google.common.collect.ImmutableList;
|
11
10
|
import com.google.common.collect.ImmutableMap;
|
12
11
|
import org.embulk.EmbulkTestRuntime;
|
@@ -31,6 +30,7 @@ import org.mockito.Mockito;
|
|
31
30
|
import java.lang.reflect.Field;
|
32
31
|
import java.util.ArrayList;
|
33
32
|
import java.util.List;
|
33
|
+
import java.util.Optional;
|
34
34
|
|
35
35
|
import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
36
36
|
import static org.junit.Assert.assertEquals;
|
@@ -235,7 +235,7 @@ public class TestS3FileInputPlugin
|
|
235
235
|
doReturn(s3objectList("in/aa/a", StorageClass.Glacier)).when(client).listObjects(any(ListObjectsRequest.class));
|
236
236
|
|
237
237
|
AbstractS3FileInputPlugin plugin = Mockito.mock(AbstractS3FileInputPlugin.class, Mockito.CALLS_REAL_METHODS);
|
238
|
-
plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional
|
238
|
+
plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional.empty(), false);
|
239
239
|
}
|
240
240
|
|
241
241
|
private FileList.Builder newFileList(ConfigSource config, Object... nameAndSize)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,21 +61,19 @@ files:
|
|
61
61
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
62
62
|
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
63
63
|
- src/test/resources/sample_01.csv
|
64
|
-
- classpath/
|
65
|
-
- classpath/
|
66
|
-
- classpath/
|
64
|
+
- classpath/embulk-util-aws-credentials-0.3.0.jar
|
65
|
+
- classpath/httpcore-4.4.9.jar
|
66
|
+
- classpath/httpclient-4.5.5.jar
|
67
67
|
- classpath/ion-java-1.0.2.jar
|
68
|
-
- classpath/
|
69
|
-
- classpath/
|
70
|
-
- classpath/commons-codec-1.9.jar
|
71
|
-
- classpath/jmespath-java-1.11.253.jar
|
68
|
+
- classpath/embulk-input-s3-0.3.0.jar
|
69
|
+
- classpath/aws-java-sdk-core-1.11.466.jar
|
72
70
|
- classpath/jcl-over-slf4j-1.7.12.jar
|
73
|
-
- classpath/
|
74
|
-
- classpath/
|
71
|
+
- classpath/commons-codec-1.10.jar
|
72
|
+
- classpath/jmespath-java-1.11.466.jar
|
73
|
+
- classpath/jackson-databind-2.6.7.2.jar
|
75
74
|
- classpath/jackson-dataformat-cbor-2.6.7.jar
|
76
|
-
- classpath/
|
77
|
-
- classpath/
|
78
|
-
- classpath/jackson-annotations-2.6.0.jar
|
75
|
+
- classpath/aws-java-sdk-s3-1.11.466.jar
|
76
|
+
- classpath/aws-java-sdk-kms-1.11.466.jar
|
79
77
|
homepage: https://github.com/embulk/embulk-input-s3
|
80
78
|
licenses:
|
81
79
|
- Apache 2.0
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|