embulk-input-s3 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.2.5.jar +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +23 -9
- data/src/main/java/org/embulk/input/s3/FileList.java +20 -2
- data/src/test/java/org/embulk/input/s3/TestAwsCredentials.java +173 -0
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +151 -167
- data/src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java +14 -1
- data/src/test/resources/sample_01.csv +3 -0
- metadata +5 -3
- data/classpath/embulk-input-s3-0.2.4.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 857c5f811620e1273a6fc5ec69a4eeefb658f1b0
|
4
|
+
data.tar.gz: deb852435fb0cee1d2db0391cd3a8d0f30255638
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5dd533e7e7d5fd366268496118c110689bde80d4f5c98469eb9ff1be77734a168080a24f52d3415dcda158ce5cd1ace35c9025fc4b654663c9726f3994b9bc3
|
7
|
+
data.tar.gz: 1d5cab1c9c166a6352dbab3a30e65d058740e3b1883bb0776fac7bc6b677b6fe2bc8e047dba6e006f75cae722e0416504e8d6f493c6466dd7d54be3e484c79a7
|
Binary file
|
@@ -22,6 +22,7 @@ import com.amazonaws.services.s3.model.ObjectListing;
|
|
22
22
|
import com.amazonaws.services.s3.model.GetObjectRequest;
|
23
23
|
import com.amazonaws.services.s3.model.S3Object;
|
24
24
|
import com.amazonaws.ClientConfiguration;
|
25
|
+
import com.amazonaws.AmazonServiceException;
|
25
26
|
import com.amazonaws.Protocol;
|
26
27
|
import org.embulk.config.Config;
|
27
28
|
import org.embulk.config.ConfigInject;
|
@@ -31,6 +32,7 @@ import org.embulk.config.TaskSource;
|
|
31
32
|
import org.embulk.config.ConfigSource;
|
32
33
|
import org.embulk.config.ConfigDiff;
|
33
34
|
import org.embulk.config.TaskReport;
|
35
|
+
import org.embulk.config.ConfigException;
|
34
36
|
import org.embulk.spi.BufferAllocator;
|
35
37
|
import org.embulk.spi.Exec;
|
36
38
|
import org.embulk.spi.FileInputPlugin;
|
@@ -139,17 +141,29 @@ public abstract class AbstractS3FileInputPlugin
|
|
139
141
|
|
140
142
|
private FileList listFiles(PluginTask task)
|
141
143
|
{
|
142
|
-
|
143
|
-
|
144
|
+
try {
|
145
|
+
AmazonS3Client client = newS3Client(task);
|
146
|
+
String bucketName = task.getBucket();
|
144
147
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
+
if (task.getPathPrefix().equals("/")) {
|
149
|
+
log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
|
150
|
+
}
|
148
151
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
152
|
+
FileList.Builder builder = new FileList.Builder(task);
|
153
|
+
listS3FilesByPrefix(builder, client, bucketName,
|
154
|
+
task.getPathPrefix(), task.getLastPath());
|
155
|
+
return builder.build();
|
156
|
+
}
|
157
|
+
catch (AmazonServiceException ex) {
|
158
|
+
if (ex.getErrorType().equals(AmazonServiceException.ErrorType.Client)) {
|
159
|
+
// HTTP 40x errors. auth error, bucket doesn't exist, etc. See AWS document for the full list:
|
160
|
+
// http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
|
161
|
+
if (ex.getStatusCode() != 400) { // 404 Bad Request is unexpected error
|
162
|
+
throw new ConfigException(ex);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
throw ex;
|
166
|
+
}
|
153
167
|
}
|
154
168
|
|
155
169
|
/**
|
@@ -5,6 +5,8 @@ import java.util.AbstractList;
|
|
5
5
|
import java.util.ArrayList;
|
6
6
|
import java.util.zip.GZIPInputStream;
|
7
7
|
import java.util.zip.GZIPOutputStream;
|
8
|
+
import java.util.regex.Pattern;
|
9
|
+
import java.util.regex.Matcher;
|
8
10
|
import java.io.InputStream;
|
9
11
|
import java.io.OutputStream;
|
10
12
|
import java.io.BufferedOutputStream;
|
@@ -29,6 +31,10 @@ public class FileList
|
|
29
31
|
{
|
30
32
|
public interface Task
|
31
33
|
{
|
34
|
+
@Config("path_match_pattern")
|
35
|
+
@ConfigDefault("\".*\"")
|
36
|
+
String getPathMatchPattern();
|
37
|
+
|
32
38
|
@Config("total_file_count_limit")
|
33
39
|
@ConfigDefault("2147483647")
|
34
40
|
int getTotalFileCountLimit();
|
@@ -63,17 +69,21 @@ public class FileList
|
|
63
69
|
private String last = null;
|
64
70
|
|
65
71
|
private int limitCount = Integer.MAX_VALUE;
|
72
|
+
private Pattern pathMatchPattern;
|
73
|
+
|
66
74
|
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
67
75
|
|
68
76
|
public Builder(Task task)
|
69
77
|
{
|
70
78
|
this();
|
71
79
|
this.limitCount = task.getTotalFileCountLimit();
|
80
|
+
this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
|
72
81
|
}
|
73
82
|
|
74
83
|
public Builder(ConfigSource config)
|
75
84
|
{
|
76
85
|
this();
|
86
|
+
this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
|
77
87
|
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
|
78
88
|
}
|
79
89
|
|
@@ -94,6 +104,12 @@ public class FileList
|
|
94
104
|
return this;
|
95
105
|
}
|
96
106
|
|
107
|
+
public Builder pathMatchPattern(String pattern)
|
108
|
+
{
|
109
|
+
this.pathMatchPattern = Pattern.compile(pattern);
|
110
|
+
return this;
|
111
|
+
}
|
112
|
+
|
97
113
|
public int size()
|
98
114
|
{
|
99
115
|
return entries.size();
|
@@ -104,6 +120,7 @@ public class FileList
|
|
104
120
|
return size() < limitCount;
|
105
121
|
}
|
106
122
|
|
123
|
+
// returns true if this file is used
|
107
124
|
public synchronized boolean add(String path, long size)
|
108
125
|
{
|
109
126
|
// TODO throw IllegalStateException if stream is already closed
|
@@ -112,8 +129,9 @@ public class FileList
|
|
112
129
|
return false;
|
113
130
|
}
|
114
131
|
|
115
|
-
|
116
|
-
|
132
|
+
if (!pathMatchPattern.matcher(path).matches()) {
|
133
|
+
return false;
|
134
|
+
}
|
117
135
|
|
118
136
|
int index = entries.size();
|
119
137
|
entries.add(new Entry(index, size));
|
@@ -0,0 +1,173 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import com.amazonaws.auth.BasicAWSCredentials;
|
4
|
+
import com.amazonaws.auth.BasicSessionCredentials;
|
5
|
+
import com.amazonaws.auth.policy.Policy;
|
6
|
+
import com.amazonaws.auth.policy.Resource;
|
7
|
+
import com.amazonaws.auth.policy.Statement;
|
8
|
+
import com.amazonaws.auth.policy.actions.S3Actions;
|
9
|
+
import com.amazonaws.internal.StaticCredentialsProvider;
|
10
|
+
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
|
11
|
+
import com.amazonaws.services.securitytoken.model.Credentials;
|
12
|
+
import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
|
13
|
+
import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
|
14
|
+
import org.embulk.EmbulkTestRuntime;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.input.s3.TestS3FileInputPlugin.Control;
|
18
|
+
import org.embulk.spi.FileInputRunner;
|
19
|
+
import org.embulk.spi.TestPageBuilderReader;
|
20
|
+
import org.junit.Before;
|
21
|
+
import org.junit.BeforeClass;
|
22
|
+
import org.junit.Rule;
|
23
|
+
import org.junit.Test;
|
24
|
+
|
25
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.assertRecords;
|
26
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.parserConfig;
|
27
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.schemaConfig;
|
28
|
+
import static org.junit.Assert.assertEquals;
|
29
|
+
import static org.junit.Assume.assumeNotNull;
|
30
|
+
|
31
|
+
public class TestAwsCredentials
|
32
|
+
{
|
33
|
+
private static String EMBULK_S3_TEST_BUCKET;
|
34
|
+
private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
|
35
|
+
private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
|
36
|
+
private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
|
37
|
+
|
38
|
+
/*
|
39
|
+
* This test case requires environment variables:
|
40
|
+
* EMBULK_S3_TEST_BUCKET
|
41
|
+
* EMBULK_S3_TEST_ACCESS_KEY_ID
|
42
|
+
* EMBULK_S3_TEST_SECRET_ACCESS_KEY
|
43
|
+
* If the variables not set, the test case is skipped.
|
44
|
+
*/
|
45
|
+
@BeforeClass
|
46
|
+
public static void initializeConstantVariables()
|
47
|
+
{
|
48
|
+
EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
|
49
|
+
EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
|
50
|
+
EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
|
51
|
+
assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
52
|
+
}
|
53
|
+
|
54
|
+
@Rule
|
55
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
56
|
+
|
57
|
+
private ConfigSource config;
|
58
|
+
private FileInputRunner runner;
|
59
|
+
private TestPageBuilderReader.MockPageOutput output;
|
60
|
+
|
61
|
+
@Before
|
62
|
+
public void createResources()
|
63
|
+
{
|
64
|
+
config = runtime.getExec().newConfigSource()
|
65
|
+
.set("type", "s3")
|
66
|
+
.set("bucket", EMBULK_S3_TEST_BUCKET)
|
67
|
+
.set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
|
68
|
+
.set("parser", parserConfig(schemaConfig()));
|
69
|
+
runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
|
70
|
+
output = new TestPageBuilderReader.MockPageOutput();
|
71
|
+
}
|
72
|
+
|
73
|
+
private void doTest(ConfigSource config)
|
74
|
+
{
|
75
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
76
|
+
|
77
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
|
78
|
+
assertRecords(config, output);
|
79
|
+
}
|
80
|
+
|
81
|
+
@Test
|
82
|
+
public void useBasic()
|
83
|
+
{
|
84
|
+
ConfigSource config = this.config.deepCopy()
|
85
|
+
.set("auth_method", "basic")
|
86
|
+
.set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
|
87
|
+
.set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
88
|
+
doTest(config);
|
89
|
+
}
|
90
|
+
|
91
|
+
@Test
|
92
|
+
public void useEnv()
|
93
|
+
{
|
94
|
+
// TODO
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void useInstance()
|
99
|
+
{
|
100
|
+
// TODO
|
101
|
+
}
|
102
|
+
|
103
|
+
@Test
|
104
|
+
public void useProfile()
|
105
|
+
{
|
106
|
+
// TODO
|
107
|
+
}
|
108
|
+
|
109
|
+
@Test
|
110
|
+
public void useProperties()
|
111
|
+
{
|
112
|
+
String origAccessKeyId = System.getProperty("aws.accessKeyId");
|
113
|
+
String origSecretKey = System.getProperty("aws.secretKey");
|
114
|
+
try {
|
115
|
+
|
116
|
+
ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
|
117
|
+
System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
|
118
|
+
System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
119
|
+
doTest(config);
|
120
|
+
}
|
121
|
+
finally {
|
122
|
+
if (origAccessKeyId != null) {
|
123
|
+
System.setProperty("aws.accessKeyId", origAccessKeyId);
|
124
|
+
}
|
125
|
+
if (origSecretKey != null) {
|
126
|
+
System.setProperty("aws.secretKey", origAccessKeyId);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
@Test
|
132
|
+
public void useAnonymous()
|
133
|
+
{
|
134
|
+
// TODO
|
135
|
+
}
|
136
|
+
|
137
|
+
@Test
|
138
|
+
public void useSession()
|
139
|
+
{
|
140
|
+
BasicSessionCredentials sessionCredentials = getSessionCredentials();
|
141
|
+
ConfigSource config = this.config.deepCopy()
|
142
|
+
.set("auth_method", "session")
|
143
|
+
.set("access_key_id", sessionCredentials.getAWSAccessKeyId())
|
144
|
+
.set("secret_access_key", sessionCredentials.getAWSSecretKey())
|
145
|
+
.set("session_token", sessionCredentials.getSessionToken());
|
146
|
+
doTest(config);
|
147
|
+
}
|
148
|
+
|
149
|
+
private static BasicSessionCredentials getSessionCredentials()
|
150
|
+
{
|
151
|
+
AWSSecurityTokenServiceClient stsClient = new AWSSecurityTokenServiceClient(
|
152
|
+
new StaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY)));
|
153
|
+
|
154
|
+
GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
|
155
|
+
getFederationTokenRequest.setDurationSeconds(7200);
|
156
|
+
getFederationTokenRequest.setName("dummy");
|
157
|
+
|
158
|
+
Policy policy = new Policy().withStatements(new Statement(Statement.Effect.Allow)
|
159
|
+
.withActions(S3Actions.ListObjects, S3Actions.GetObject)
|
160
|
+
.withResources(
|
161
|
+
new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET + "/" + EMBULK_S3_TEST_PATH_PREFIX + "/*"),
|
162
|
+
new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET)));
|
163
|
+
getFederationTokenRequest.setPolicy(policy.toJson());
|
164
|
+
|
165
|
+
GetFederationTokenResult federationTokenResult = stsClient.getFederationToken(getFederationTokenRequest);
|
166
|
+
Credentials sessionCredentials = federationTokenResult.getCredentials();
|
167
|
+
|
168
|
+
return new BasicSessionCredentials(
|
169
|
+
sessionCredentials.getAccessKeyId(),
|
170
|
+
sessionCredentials.getSecretAccessKey(),
|
171
|
+
sessionCredentials.getSessionToken());
|
172
|
+
}
|
173
|
+
}
|
@@ -1,236 +1,220 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import com.amazonaws.services.s3.AmazonS3Client;
|
4
|
-
import com.amazonaws.services.s3.model.GetObjectRequest;
|
5
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
6
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
7
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
8
|
-
import com.amazonaws.services.s3.model.S3Object;
|
9
|
-
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
10
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
11
|
-
import com.google.common.base.Optional;
|
12
3
|
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
13
5
|
import org.embulk.EmbulkTestRuntime;
|
14
6
|
import org.embulk.config.ConfigDiff;
|
15
7
|
import org.embulk.config.ConfigSource;
|
16
8
|
import org.embulk.config.TaskReport;
|
17
9
|
import org.embulk.config.TaskSource;
|
18
|
-
import org.embulk.
|
19
|
-
import org.embulk.
|
20
|
-
import org.embulk.
|
21
|
-
import org.embulk.spi.
|
22
|
-
import org.embulk.spi.
|
23
|
-
import org.embulk.spi.util.
|
10
|
+
import org.embulk.spi.FileInputRunner;
|
11
|
+
import org.embulk.spi.InputPlugin;
|
12
|
+
import org.embulk.spi.PageOutput;
|
13
|
+
import org.embulk.spi.Schema;
|
14
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
15
|
+
import org.embulk.spi.util.Pages;
|
16
|
+
import org.embulk.standards.CsvParserPlugin;
|
24
17
|
import org.junit.Before;
|
18
|
+
import org.junit.BeforeClass;
|
25
19
|
import org.junit.Rule;
|
26
20
|
import org.junit.Test;
|
27
21
|
|
28
|
-
import java.
|
29
|
-
import java.util.Arrays;
|
22
|
+
import java.util.ArrayList;
|
30
23
|
import java.util.List;
|
31
24
|
|
32
|
-
import static org.junit.Assert
|
33
|
-
import static org.
|
34
|
-
import static org.
|
35
|
-
import static org.mockito.Mockito.mock;
|
36
|
-
import static org.mockito.Mockito.spy;
|
25
|
+
import static org.junit.Assert.assertEquals;
|
26
|
+
import static org.junit.Assert.assertNull;
|
27
|
+
import static org.junit.Assume.assumeNotNull;
|
37
28
|
|
38
29
|
public class TestS3FileInputPlugin
|
39
30
|
{
|
31
|
+
private static String EMBULK_S3_TEST_BUCKET;
|
32
|
+
private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
|
33
|
+
private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
|
34
|
+
private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
|
35
|
+
|
36
|
+
/*
|
37
|
+
* This test case requires environment variables:
|
38
|
+
* EMBULK_S3_TEST_BUCKET
|
39
|
+
* EMBULK_S3_TEST_ACCESS_KEY_ID
|
40
|
+
* EMBULK_S3_TEST_SECRET_ACCESS_KEY
|
41
|
+
* If the variables not set, the test case is skipped.
|
42
|
+
*/
|
43
|
+
@BeforeClass
|
44
|
+
public static void initializeConstantVariables()
|
45
|
+
{
|
46
|
+
EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
|
47
|
+
EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
|
48
|
+
EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
|
49
|
+
assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
50
|
+
}
|
51
|
+
|
40
52
|
@Rule
|
41
53
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
42
54
|
|
43
55
|
private ConfigSource config;
|
44
|
-
private
|
45
|
-
private
|
56
|
+
private FileInputRunner runner;
|
57
|
+
private MockPageOutput output;
|
46
58
|
|
47
59
|
@Before
|
48
60
|
public void createResources()
|
49
61
|
{
|
50
|
-
config =
|
51
|
-
|
52
|
-
|
62
|
+
config = runtime.getExec().newConfigSource()
|
63
|
+
.set("type", "s3")
|
64
|
+
.set("bucket", EMBULK_S3_TEST_BUCKET)
|
65
|
+
.set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
|
66
|
+
.set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY)
|
67
|
+
.set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
|
68
|
+
.set("parser", parserConfig(schemaConfig()));
|
69
|
+
runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
|
70
|
+
output = new MockPageOutput();
|
53
71
|
}
|
54
72
|
|
55
73
|
@Test
|
56
|
-
public void
|
74
|
+
public void simpleTest()
|
57
75
|
{
|
58
|
-
|
59
|
-
|
76
|
+
ConfigSource config = this.config.deepCopy();
|
77
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
78
|
+
|
79
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
|
80
|
+
assertRecords(config, output);
|
60
81
|
}
|
61
82
|
|
62
83
|
@Test
|
63
|
-
public void
|
84
|
+
public void useLastPath()
|
85
|
+
throws Exception
|
64
86
|
{
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
|
71
|
-
doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
|
72
|
-
doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
|
73
|
-
|
74
|
-
// It counts only size != 0 files.
|
75
|
-
FileList.Builder builder = new FileList.Builder();
|
76
|
-
S3FileInputPlugin.listS3FilesByPrefix(builder, client, "bucketName", "prefix", Optional.<String>absent());
|
77
|
-
assertEquals(1, builder.size());
|
87
|
+
ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
|
88
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
89
|
+
|
90
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
|
91
|
+
assertEquals(0, getRecords(config, output).size());
|
78
92
|
}
|
79
93
|
|
80
94
|
@Test
|
81
|
-
public void
|
95
|
+
public void emptyFilesWithLastPath()
|
96
|
+
throws Exception
|
82
97
|
{
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
|
88
|
-
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
89
|
-
|
90
|
-
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
91
|
-
@Override
|
92
|
-
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
93
|
-
{
|
94
|
-
assertEquals(3, taskCount);
|
95
|
-
List<String> files = fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles());
|
96
|
-
assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
|
97
|
-
return emptyTaskReports(taskCount);
|
98
|
-
}
|
99
|
-
});
|
100
|
-
|
101
|
-
assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
|
102
|
-
}
|
98
|
+
ConfigSource config = this.config.deepCopy()
|
99
|
+
.set("path_prefix", "empty_files_prefix")
|
100
|
+
.set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
|
101
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
103
102
|
|
104
|
-
|
105
|
-
|
106
|
-
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
107
|
-
|
108
|
-
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
109
|
-
@Override
|
110
|
-
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
111
|
-
{
|
112
|
-
assertEquals(0, taskCount);
|
113
|
-
assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
|
114
|
-
return emptyTaskReports(taskCount);
|
115
|
-
}
|
116
|
-
});
|
117
|
-
|
118
|
-
assertEquals(null, configDiff.get(String.class, "last_path", null));
|
119
|
-
}
|
120
|
-
|
121
|
-
{ // if files are empty, keep the previous last_path.
|
122
|
-
config.set("last_path", "in/bb");
|
123
|
-
|
124
|
-
ObjectListing listing = listing("in/aa", 0L);
|
125
|
-
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
126
|
-
|
127
|
-
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
128
|
-
@Override
|
129
|
-
public List<TaskReport> run(TaskSource taskSource, int taskCount) {
|
130
|
-
assertEquals(0, taskCount);
|
131
|
-
assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
|
132
|
-
return emptyTaskReports(taskCount);
|
133
|
-
}
|
134
|
-
});
|
135
|
-
|
136
|
-
assertEquals("in/bb", configDiff.get(String.class, "last_path"));
|
137
|
-
}
|
103
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path")); // keep the last_path
|
104
|
+
assertEquals(0, getRecords(config, output).size());
|
138
105
|
}
|
139
106
|
|
140
107
|
@Test
|
141
|
-
public void
|
108
|
+
public void useTotalFileCountLimit()
|
142
109
|
throws Exception
|
143
110
|
{
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
PluginTask task = config.loadConfig(plugin.getTaskClass());
|
148
|
-
FileList.Builder builder = new FileList.Builder();
|
149
|
-
builder.add("in/aa/a", 100);
|
150
|
-
task.setFiles(builder.build());
|
151
|
-
|
152
|
-
StringBuilder sbuf = new StringBuilder();
|
153
|
-
try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
|
154
|
-
LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
|
155
|
-
while (d.nextFile()) {
|
156
|
-
sbuf.append(d.poll());
|
157
|
-
}
|
158
|
-
}
|
159
|
-
assertEquals("aa", sbuf.toString());
|
160
|
-
}
|
111
|
+
ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
|
112
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
161
113
|
|
162
|
-
|
163
|
-
|
164
|
-
return Exec.newConfigSource()
|
165
|
-
.set("bucket", "my_bucket")
|
166
|
-
.set("path_prefix", "my_path_prefix")
|
167
|
-
.set("access_key_id", "my_access_key_id")
|
168
|
-
.set("secret_access_key", "my_secret_access_key");
|
114
|
+
assertNull(configDiff.get(String.class, "last_path"));
|
115
|
+
assertEquals(0, getRecords(config, output).size());
|
169
116
|
}
|
170
117
|
|
171
|
-
|
118
|
+
@Test
|
119
|
+
public void usePathMatchPattern()
|
120
|
+
throws Exception
|
172
121
|
{
|
173
|
-
|
174
|
-
|
175
|
-
return listing;
|
176
|
-
}
|
122
|
+
ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/match/");
|
123
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
177
124
|
|
178
|
-
|
179
|
-
|
180
|
-
doReturn(null).when(listing).getNextMarker();
|
181
|
-
return listing;
|
125
|
+
assertNull(configDiff.get(String.class, "last_path"));
|
126
|
+
assertEquals(0, getRecords(config, output).size());
|
182
127
|
}
|
183
128
|
|
184
|
-
static
|
129
|
+
static class Control
|
130
|
+
implements InputPlugin.Control
|
185
131
|
{
|
186
|
-
|
187
|
-
|
188
|
-
|
132
|
+
private FileInputRunner runner;
|
133
|
+
private PageOutput output;
|
134
|
+
|
135
|
+
Control(FileInputRunner runner, PageOutput output)
|
136
|
+
{
|
137
|
+
this.runner = runner;
|
138
|
+
this.output = output;
|
189
139
|
}
|
190
140
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
141
|
+
@Override
|
142
|
+
public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
143
|
+
{
|
144
|
+
List<TaskReport> reports = new ArrayList<>();
|
145
|
+
for (int i = 0; i < taskCount; i++) {
|
146
|
+
reports.add(runner.run(taskSource, schema, i, output));
|
147
|
+
}
|
148
|
+
return reports;
|
195
149
|
}
|
196
|
-
return builder.build();
|
197
150
|
}
|
198
151
|
|
199
|
-
static
|
152
|
+
static ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
|
200
153
|
{
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
154
|
+
ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
|
155
|
+
builder.put("type", "csv");
|
156
|
+
builder.put("newline", "CRLF");
|
157
|
+
builder.put("delimiter", ",");
|
158
|
+
builder.put("quote", "\"");
|
159
|
+
builder.put("escape", "\"");
|
160
|
+
builder.put("trim_if_not_quoted", false);
|
161
|
+
builder.put("skip_header_lines", 0);
|
162
|
+
builder.put("allow_extra_columns", false);
|
163
|
+
builder.put("allow_optional_columns", false);
|
164
|
+
builder.put("columns", schemaConfig);
|
165
|
+
return builder.build();
|
205
166
|
}
|
206
167
|
|
207
|
-
static
|
168
|
+
static ImmutableList<Object> schemaConfig()
|
208
169
|
{
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
170
|
+
ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
|
171
|
+
builder.add(ImmutableMap.of("name", "timestamp", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
|
172
|
+
builder.add(ImmutableMap.of("name", "host", "type", "string"));
|
173
|
+
builder.add(ImmutableMap.of("name", "path", "type", "string"));
|
174
|
+
builder.add(ImmutableMap.of("name", "method", "type", "string"));
|
175
|
+
builder.add(ImmutableMap.of("name", "referer", "type", "string"));
|
176
|
+
builder.add(ImmutableMap.of("name", "code", "type", "long"));
|
177
|
+
builder.add(ImmutableMap.of("name", "agent", "type", "string"));
|
178
|
+
builder.add(ImmutableMap.of("name", "user", "type", "string"));
|
179
|
+
builder.add(ImmutableMap.of("name", "size", "type", "long"));
|
180
|
+
return builder.build();
|
215
181
|
}
|
216
182
|
|
217
|
-
static
|
183
|
+
static void assertRecords(ConfigSource config, MockPageOutput output)
|
218
184
|
{
|
219
|
-
|
220
|
-
|
221
|
-
|
185
|
+
List<Object[]> records = getRecords(config, output);
|
186
|
+
|
187
|
+
assertEquals(2, records.size());
|
188
|
+
{
|
189
|
+
Object[] record = records.get(0);
|
190
|
+
assertEquals("2014-10-02 22:15:39 UTC", record[0].toString());
|
191
|
+
assertEquals("84.186.29.187", record[1]);
|
192
|
+
assertEquals("/category/electronics", record[2]);
|
193
|
+
assertEquals("GET", record[3]);
|
194
|
+
assertEquals("/category/music", record[4]);
|
195
|
+
assertEquals(200L, record[5]);
|
196
|
+
assertEquals("Mozilla/5.0", record[6]);
|
197
|
+
assertEquals("-", record[7]);
|
198
|
+
assertEquals(136L, record[8]);
|
199
|
+
}
|
200
|
+
|
201
|
+
{
|
202
|
+
Object[] record = records.get(1);
|
203
|
+
assertEquals("2014-10-02 22:15:01 UTC", record[0].toString());
|
204
|
+
assertEquals("140.36.216.47", record[1]);
|
205
|
+
assertEquals("/category/music?from=10", record[2]);
|
206
|
+
assertEquals("GET", record[3]);
|
207
|
+
assertEquals("-", record[4]);
|
208
|
+
assertEquals(200L, record[5]);
|
209
|
+
assertEquals("Mozilla/5.0", record[6]);
|
210
|
+
assertEquals("-", record[7]);
|
211
|
+
assertEquals(70L, record[8]);
|
222
212
|
}
|
223
|
-
return reports.build();
|
224
213
|
}
|
225
214
|
|
226
|
-
|
215
|
+
static List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
|
227
216
|
{
|
228
|
-
|
229
|
-
|
230
|
-
for (String path : list.get(i)) {
|
231
|
-
builder.add(path);
|
232
|
-
}
|
233
|
-
}
|
234
|
-
return builder.build();
|
217
|
+
Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
|
218
|
+
return Pages.toObjects(schema, output.pages);
|
235
219
|
}
|
236
220
|
}
|
@@ -2,6 +2,9 @@ package org.embulk.input.s3;
|
|
2
2
|
|
3
3
|
import com.amazonaws.services.s3.AmazonS3Client;
|
4
4
|
import com.amazonaws.services.s3.model.GetObjectRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
6
|
+
import com.amazonaws.services.s3.model.S3Object;
|
7
|
+
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
5
8
|
import org.embulk.EmbulkTestRuntime;
|
6
9
|
import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
|
7
10
|
import org.junit.Before;
|
@@ -9,11 +12,11 @@ import org.junit.Rule;
|
|
9
12
|
import org.junit.Test;
|
10
13
|
|
11
14
|
import java.io.BufferedReader;
|
15
|
+
import java.io.ByteArrayInputStream;
|
12
16
|
import java.io.InputStream;
|
13
17
|
import java.io.InputStreamReader;
|
14
18
|
|
15
19
|
import static org.junit.Assert.assertEquals;
|
16
|
-
import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
|
17
20
|
import static org.mockito.Matchers.any;
|
18
21
|
import static org.mockito.Mockito.doReturn;
|
19
22
|
import static org.mockito.Mockito.doThrow;
|
@@ -60,4 +63,14 @@ public class TestS3InputStreamReopener
|
|
60
63
|
}
|
61
64
|
}
|
62
65
|
}
|
66
|
+
|
67
|
+
static S3Object s3object(String key, String value)
|
68
|
+
{
|
69
|
+
S3Object o = new S3Object();
|
70
|
+
o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
|
71
|
+
ObjectMetadata om = new ObjectMetadata();
|
72
|
+
om.setContentLength(value.length());
|
73
|
+
o.setObjectMetadata(om);
|
74
|
+
return o;
|
75
|
+
}
|
63
76
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,13 +52,15 @@ files:
|
|
52
52
|
- src/main/java/org/embulk/input/s3/AwsCredentialsTask.java
|
53
53
|
- src/main/java/org/embulk/input/s3/FileList.java
|
54
54
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
55
|
+
- src/test/java/org/embulk/input/s3/TestAwsCredentials.java
|
55
56
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
56
57
|
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
58
|
+
- src/test/resources/sample_01.csv
|
57
59
|
- classpath/aws-java-sdk-core-1.10.33.jar
|
58
60
|
- classpath/aws-java-sdk-kms-1.10.33.jar
|
59
61
|
- classpath/aws-java-sdk-s3-1.10.33.jar
|
60
62
|
- classpath/commons-codec-1.6.jar
|
61
|
-
- classpath/embulk-input-s3-0.2.
|
63
|
+
- classpath/embulk-input-s3-0.2.5.jar
|
62
64
|
- classpath/httpclient-4.3.6.jar
|
63
65
|
- classpath/httpcore-4.3.3.jar
|
64
66
|
- classpath/jcl-over-slf4j-1.7.12.jar
|
Binary file
|