embulk-input-s3 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.2.5.jar +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +23 -9
- data/src/main/java/org/embulk/input/s3/FileList.java +20 -2
- data/src/test/java/org/embulk/input/s3/TestAwsCredentials.java +173 -0
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +151 -167
- data/src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java +14 -1
- data/src/test/resources/sample_01.csv +3 -0
- metadata +5 -3
- data/classpath/embulk-input-s3-0.2.4.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 857c5f811620e1273a6fc5ec69a4eeefb658f1b0
|
4
|
+
data.tar.gz: deb852435fb0cee1d2db0391cd3a8d0f30255638
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5dd533e7e7d5fd366268496118c110689bde80d4f5c98469eb9ff1be77734a168080a24f52d3415dcda158ce5cd1ace35c9025fc4b654663c9726f3994b9bc3
|
7
|
+
data.tar.gz: 1d5cab1c9c166a6352dbab3a30e65d058740e3b1883bb0776fac7bc6b677b6fe2bc8e047dba6e006f75cae722e0416504e8d6f493c6466dd7d54be3e484c79a7
|
Binary file
|
@@ -22,6 +22,7 @@ import com.amazonaws.services.s3.model.ObjectListing;
|
|
22
22
|
import com.amazonaws.services.s3.model.GetObjectRequest;
|
23
23
|
import com.amazonaws.services.s3.model.S3Object;
|
24
24
|
import com.amazonaws.ClientConfiguration;
|
25
|
+
import com.amazonaws.AmazonServiceException;
|
25
26
|
import com.amazonaws.Protocol;
|
26
27
|
import org.embulk.config.Config;
|
27
28
|
import org.embulk.config.ConfigInject;
|
@@ -31,6 +32,7 @@ import org.embulk.config.TaskSource;
|
|
31
32
|
import org.embulk.config.ConfigSource;
|
32
33
|
import org.embulk.config.ConfigDiff;
|
33
34
|
import org.embulk.config.TaskReport;
|
35
|
+
import org.embulk.config.ConfigException;
|
34
36
|
import org.embulk.spi.BufferAllocator;
|
35
37
|
import org.embulk.spi.Exec;
|
36
38
|
import org.embulk.spi.FileInputPlugin;
|
@@ -139,17 +141,29 @@ public abstract class AbstractS3FileInputPlugin
|
|
139
141
|
|
140
142
|
private FileList listFiles(PluginTask task)
|
141
143
|
{
|
142
|
-
|
143
|
-
|
144
|
+
try {
|
145
|
+
AmazonS3Client client = newS3Client(task);
|
146
|
+
String bucketName = task.getBucket();
|
144
147
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
+
if (task.getPathPrefix().equals("/")) {
|
149
|
+
log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
|
150
|
+
}
|
148
151
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
152
|
+
FileList.Builder builder = new FileList.Builder(task);
|
153
|
+
listS3FilesByPrefix(builder, client, bucketName,
|
154
|
+
task.getPathPrefix(), task.getLastPath());
|
155
|
+
return builder.build();
|
156
|
+
}
|
157
|
+
catch (AmazonServiceException ex) {
|
158
|
+
if (ex.getErrorType().equals(AmazonServiceException.ErrorType.Client)) {
|
159
|
+
// HTTP 40x errors. auth error, bucket doesn't exist, etc. See AWS document for the full list:
|
160
|
+
// http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
|
161
|
+
if (ex.getStatusCode() != 400) { // 404 Bad Request is unexpected error
|
162
|
+
throw new ConfigException(ex);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
throw ex;
|
166
|
+
}
|
153
167
|
}
|
154
168
|
|
155
169
|
/**
|
@@ -5,6 +5,8 @@ import java.util.AbstractList;
|
|
5
5
|
import java.util.ArrayList;
|
6
6
|
import java.util.zip.GZIPInputStream;
|
7
7
|
import java.util.zip.GZIPOutputStream;
|
8
|
+
import java.util.regex.Pattern;
|
9
|
+
import java.util.regex.Matcher;
|
8
10
|
import java.io.InputStream;
|
9
11
|
import java.io.OutputStream;
|
10
12
|
import java.io.BufferedOutputStream;
|
@@ -29,6 +31,10 @@ public class FileList
|
|
29
31
|
{
|
30
32
|
public interface Task
|
31
33
|
{
|
34
|
+
@Config("path_match_pattern")
|
35
|
+
@ConfigDefault("\".*\"")
|
36
|
+
String getPathMatchPattern();
|
37
|
+
|
32
38
|
@Config("total_file_count_limit")
|
33
39
|
@ConfigDefault("2147483647")
|
34
40
|
int getTotalFileCountLimit();
|
@@ -63,17 +69,21 @@ public class FileList
|
|
63
69
|
private String last = null;
|
64
70
|
|
65
71
|
private int limitCount = Integer.MAX_VALUE;
|
72
|
+
private Pattern pathMatchPattern;
|
73
|
+
|
66
74
|
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
67
75
|
|
68
76
|
public Builder(Task task)
|
69
77
|
{
|
70
78
|
this();
|
71
79
|
this.limitCount = task.getTotalFileCountLimit();
|
80
|
+
this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
|
72
81
|
}
|
73
82
|
|
74
83
|
public Builder(ConfigSource config)
|
75
84
|
{
|
76
85
|
this();
|
86
|
+
this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
|
77
87
|
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
|
78
88
|
}
|
79
89
|
|
@@ -94,6 +104,12 @@ public class FileList
|
|
94
104
|
return this;
|
95
105
|
}
|
96
106
|
|
107
|
+
public Builder pathMatchPattern(String pattern)
|
108
|
+
{
|
109
|
+
this.pathMatchPattern = Pattern.compile(pattern);
|
110
|
+
return this;
|
111
|
+
}
|
112
|
+
|
97
113
|
public int size()
|
98
114
|
{
|
99
115
|
return entries.size();
|
@@ -104,6 +120,7 @@ public class FileList
|
|
104
120
|
return size() < limitCount;
|
105
121
|
}
|
106
122
|
|
123
|
+
// returns true if this file is used
|
107
124
|
public synchronized boolean add(String path, long size)
|
108
125
|
{
|
109
126
|
// TODO throw IllegalStateException if stream is already closed
|
@@ -112,8 +129,9 @@ public class FileList
|
|
112
129
|
return false;
|
113
130
|
}
|
114
131
|
|
115
|
-
|
116
|
-
|
132
|
+
if (!pathMatchPattern.matcher(path).matches()) {
|
133
|
+
return false;
|
134
|
+
}
|
117
135
|
|
118
136
|
int index = entries.size();
|
119
137
|
entries.add(new Entry(index, size));
|
@@ -0,0 +1,173 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import com.amazonaws.auth.BasicAWSCredentials;
|
4
|
+
import com.amazonaws.auth.BasicSessionCredentials;
|
5
|
+
import com.amazonaws.auth.policy.Policy;
|
6
|
+
import com.amazonaws.auth.policy.Resource;
|
7
|
+
import com.amazonaws.auth.policy.Statement;
|
8
|
+
import com.amazonaws.auth.policy.actions.S3Actions;
|
9
|
+
import com.amazonaws.internal.StaticCredentialsProvider;
|
10
|
+
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
|
11
|
+
import com.amazonaws.services.securitytoken.model.Credentials;
|
12
|
+
import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
|
13
|
+
import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
|
14
|
+
import org.embulk.EmbulkTestRuntime;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigSource;
|
17
|
+
import org.embulk.input.s3.TestS3FileInputPlugin.Control;
|
18
|
+
import org.embulk.spi.FileInputRunner;
|
19
|
+
import org.embulk.spi.TestPageBuilderReader;
|
20
|
+
import org.junit.Before;
|
21
|
+
import org.junit.BeforeClass;
|
22
|
+
import org.junit.Rule;
|
23
|
+
import org.junit.Test;
|
24
|
+
|
25
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.assertRecords;
|
26
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.parserConfig;
|
27
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.schemaConfig;
|
28
|
+
import static org.junit.Assert.assertEquals;
|
29
|
+
import static org.junit.Assume.assumeNotNull;
|
30
|
+
|
31
|
+
public class TestAwsCredentials
|
32
|
+
{
|
33
|
+
private static String EMBULK_S3_TEST_BUCKET;
|
34
|
+
private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
|
35
|
+
private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
|
36
|
+
private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
|
37
|
+
|
38
|
+
/*
|
39
|
+
* This test case requires environment variables:
|
40
|
+
* EMBULK_S3_TEST_BUCKET
|
41
|
+
* EMBULK_S3_TEST_ACCESS_KEY_ID
|
42
|
+
* EMBULK_S3_TEST_SECRET_ACCESS_KEY
|
43
|
+
* If the variables not set, the test case is skipped.
|
44
|
+
*/
|
45
|
+
@BeforeClass
|
46
|
+
public static void initializeConstantVariables()
|
47
|
+
{
|
48
|
+
EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
|
49
|
+
EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
|
50
|
+
EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
|
51
|
+
assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
52
|
+
}
|
53
|
+
|
54
|
+
@Rule
|
55
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
56
|
+
|
57
|
+
private ConfigSource config;
|
58
|
+
private FileInputRunner runner;
|
59
|
+
private TestPageBuilderReader.MockPageOutput output;
|
60
|
+
|
61
|
+
@Before
|
62
|
+
public void createResources()
|
63
|
+
{
|
64
|
+
config = runtime.getExec().newConfigSource()
|
65
|
+
.set("type", "s3")
|
66
|
+
.set("bucket", EMBULK_S3_TEST_BUCKET)
|
67
|
+
.set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
|
68
|
+
.set("parser", parserConfig(schemaConfig()));
|
69
|
+
runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
|
70
|
+
output = new TestPageBuilderReader.MockPageOutput();
|
71
|
+
}
|
72
|
+
|
73
|
+
private void doTest(ConfigSource config)
|
74
|
+
{
|
75
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
76
|
+
|
77
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
|
78
|
+
assertRecords(config, output);
|
79
|
+
}
|
80
|
+
|
81
|
+
@Test
|
82
|
+
public void useBasic()
|
83
|
+
{
|
84
|
+
ConfigSource config = this.config.deepCopy()
|
85
|
+
.set("auth_method", "basic")
|
86
|
+
.set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
|
87
|
+
.set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
88
|
+
doTest(config);
|
89
|
+
}
|
90
|
+
|
91
|
+
@Test
|
92
|
+
public void useEnv()
|
93
|
+
{
|
94
|
+
// TODO
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void useInstance()
|
99
|
+
{
|
100
|
+
// TODO
|
101
|
+
}
|
102
|
+
|
103
|
+
@Test
|
104
|
+
public void useProfile()
|
105
|
+
{
|
106
|
+
// TODO
|
107
|
+
}
|
108
|
+
|
109
|
+
@Test
|
110
|
+
public void useProperties()
|
111
|
+
{
|
112
|
+
String origAccessKeyId = System.getProperty("aws.accessKeyId");
|
113
|
+
String origSecretKey = System.getProperty("aws.secretKey");
|
114
|
+
try {
|
115
|
+
|
116
|
+
ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
|
117
|
+
System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
|
118
|
+
System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
119
|
+
doTest(config);
|
120
|
+
}
|
121
|
+
finally {
|
122
|
+
if (origAccessKeyId != null) {
|
123
|
+
System.setProperty("aws.accessKeyId", origAccessKeyId);
|
124
|
+
}
|
125
|
+
if (origSecretKey != null) {
|
126
|
+
System.setProperty("aws.secretKey", origAccessKeyId);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
@Test
|
132
|
+
public void useAnonymous()
|
133
|
+
{
|
134
|
+
// TODO
|
135
|
+
}
|
136
|
+
|
137
|
+
@Test
|
138
|
+
public void useSession()
|
139
|
+
{
|
140
|
+
BasicSessionCredentials sessionCredentials = getSessionCredentials();
|
141
|
+
ConfigSource config = this.config.deepCopy()
|
142
|
+
.set("auth_method", "session")
|
143
|
+
.set("access_key_id", sessionCredentials.getAWSAccessKeyId())
|
144
|
+
.set("secret_access_key", sessionCredentials.getAWSSecretKey())
|
145
|
+
.set("session_token", sessionCredentials.getSessionToken());
|
146
|
+
doTest(config);
|
147
|
+
}
|
148
|
+
|
149
|
+
private static BasicSessionCredentials getSessionCredentials()
|
150
|
+
{
|
151
|
+
AWSSecurityTokenServiceClient stsClient = new AWSSecurityTokenServiceClient(
|
152
|
+
new StaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY)));
|
153
|
+
|
154
|
+
GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
|
155
|
+
getFederationTokenRequest.setDurationSeconds(7200);
|
156
|
+
getFederationTokenRequest.setName("dummy");
|
157
|
+
|
158
|
+
Policy policy = new Policy().withStatements(new Statement(Statement.Effect.Allow)
|
159
|
+
.withActions(S3Actions.ListObjects, S3Actions.GetObject)
|
160
|
+
.withResources(
|
161
|
+
new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET + "/" + EMBULK_S3_TEST_PATH_PREFIX + "/*"),
|
162
|
+
new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET)));
|
163
|
+
getFederationTokenRequest.setPolicy(policy.toJson());
|
164
|
+
|
165
|
+
GetFederationTokenResult federationTokenResult = stsClient.getFederationToken(getFederationTokenRequest);
|
166
|
+
Credentials sessionCredentials = federationTokenResult.getCredentials();
|
167
|
+
|
168
|
+
return new BasicSessionCredentials(
|
169
|
+
sessionCredentials.getAccessKeyId(),
|
170
|
+
sessionCredentials.getSecretAccessKey(),
|
171
|
+
sessionCredentials.getSessionToken());
|
172
|
+
}
|
173
|
+
}
|
@@ -1,236 +1,220 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import com.amazonaws.services.s3.AmazonS3Client;
|
4
|
-
import com.amazonaws.services.s3.model.GetObjectRequest;
|
5
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
6
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
7
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
8
|
-
import com.amazonaws.services.s3.model.S3Object;
|
9
|
-
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
10
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
11
|
-
import com.google.common.base.Optional;
|
12
3
|
import com.google.common.collect.ImmutableList;
|
4
|
+
import com.google.common.collect.ImmutableMap;
|
13
5
|
import org.embulk.EmbulkTestRuntime;
|
14
6
|
import org.embulk.config.ConfigDiff;
|
15
7
|
import org.embulk.config.ConfigSource;
|
16
8
|
import org.embulk.config.TaskReport;
|
17
9
|
import org.embulk.config.TaskSource;
|
18
|
-
import org.embulk.
|
19
|
-
import org.embulk.
|
20
|
-
import org.embulk.
|
21
|
-
import org.embulk.spi.
|
22
|
-
import org.embulk.spi.
|
23
|
-
import org.embulk.spi.util.
|
10
|
+
import org.embulk.spi.FileInputRunner;
|
11
|
+
import org.embulk.spi.InputPlugin;
|
12
|
+
import org.embulk.spi.PageOutput;
|
13
|
+
import org.embulk.spi.Schema;
|
14
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
15
|
+
import org.embulk.spi.util.Pages;
|
16
|
+
import org.embulk.standards.CsvParserPlugin;
|
24
17
|
import org.junit.Before;
|
18
|
+
import org.junit.BeforeClass;
|
25
19
|
import org.junit.Rule;
|
26
20
|
import org.junit.Test;
|
27
21
|
|
28
|
-
import java.
|
29
|
-
import java.util.Arrays;
|
22
|
+
import java.util.ArrayList;
|
30
23
|
import java.util.List;
|
31
24
|
|
32
|
-
import static org.junit.Assert
|
33
|
-
import static org.
|
34
|
-
import static org.
|
35
|
-
import static org.mockito.Mockito.mock;
|
36
|
-
import static org.mockito.Mockito.spy;
|
25
|
+
import static org.junit.Assert.assertEquals;
|
26
|
+
import static org.junit.Assert.assertNull;
|
27
|
+
import static org.junit.Assume.assumeNotNull;
|
37
28
|
|
38
29
|
public class TestS3FileInputPlugin
|
39
30
|
{
|
31
|
+
private static String EMBULK_S3_TEST_BUCKET;
|
32
|
+
private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
|
33
|
+
private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
|
34
|
+
private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
|
35
|
+
|
36
|
+
/*
|
37
|
+
* This test case requires environment variables:
|
38
|
+
* EMBULK_S3_TEST_BUCKET
|
39
|
+
* EMBULK_S3_TEST_ACCESS_KEY_ID
|
40
|
+
* EMBULK_S3_TEST_SECRET_ACCESS_KEY
|
41
|
+
* If the variables not set, the test case is skipped.
|
42
|
+
*/
|
43
|
+
@BeforeClass
|
44
|
+
public static void initializeConstantVariables()
|
45
|
+
{
|
46
|
+
EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
|
47
|
+
EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
|
48
|
+
EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
|
49
|
+
assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
|
50
|
+
}
|
51
|
+
|
40
52
|
@Rule
|
41
53
|
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
42
54
|
|
43
55
|
private ConfigSource config;
|
44
|
-
private
|
45
|
-
private
|
56
|
+
private FileInputRunner runner;
|
57
|
+
private MockPageOutput output;
|
46
58
|
|
47
59
|
@Before
|
48
60
|
public void createResources()
|
49
61
|
{
|
50
|
-
config =
|
51
|
-
|
52
|
-
|
62
|
+
config = runtime.getExec().newConfigSource()
|
63
|
+
.set("type", "s3")
|
64
|
+
.set("bucket", EMBULK_S3_TEST_BUCKET)
|
65
|
+
.set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
|
66
|
+
.set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY)
|
67
|
+
.set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
|
68
|
+
.set("parser", parserConfig(schemaConfig()));
|
69
|
+
runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
|
70
|
+
output = new MockPageOutput();
|
53
71
|
}
|
54
72
|
|
55
73
|
@Test
|
56
|
-
public void
|
74
|
+
public void simpleTest()
|
57
75
|
{
|
58
|
-
|
59
|
-
|
76
|
+
ConfigSource config = this.config.deepCopy();
|
77
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
78
|
+
|
79
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
|
80
|
+
assertRecords(config, output);
|
60
81
|
}
|
61
82
|
|
62
83
|
@Test
|
63
|
-
public void
|
84
|
+
public void useLastPath()
|
85
|
+
throws Exception
|
64
86
|
{
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
|
71
|
-
doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
|
72
|
-
doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
|
73
|
-
|
74
|
-
// It counts only size != 0 files.
|
75
|
-
FileList.Builder builder = new FileList.Builder();
|
76
|
-
S3FileInputPlugin.listS3FilesByPrefix(builder, client, "bucketName", "prefix", Optional.<String>absent());
|
77
|
-
assertEquals(1, builder.size());
|
87
|
+
ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
|
88
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
89
|
+
|
90
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
|
91
|
+
assertEquals(0, getRecords(config, output).size());
|
78
92
|
}
|
79
93
|
|
80
94
|
@Test
|
81
|
-
public void
|
95
|
+
public void emptyFilesWithLastPath()
|
96
|
+
throws Exception
|
82
97
|
{
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
|
88
|
-
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
89
|
-
|
90
|
-
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
91
|
-
@Override
|
92
|
-
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
93
|
-
{
|
94
|
-
assertEquals(3, taskCount);
|
95
|
-
List<String> files = fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles());
|
96
|
-
assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
|
97
|
-
return emptyTaskReports(taskCount);
|
98
|
-
}
|
99
|
-
});
|
100
|
-
|
101
|
-
assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
|
102
|
-
}
|
98
|
+
ConfigSource config = this.config.deepCopy()
|
99
|
+
.set("path_prefix", "empty_files_prefix")
|
100
|
+
.set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
|
101
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
103
102
|
|
104
|
-
|
105
|
-
|
106
|
-
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
107
|
-
|
108
|
-
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
109
|
-
@Override
|
110
|
-
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
111
|
-
{
|
112
|
-
assertEquals(0, taskCount);
|
113
|
-
assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
|
114
|
-
return emptyTaskReports(taskCount);
|
115
|
-
}
|
116
|
-
});
|
117
|
-
|
118
|
-
assertEquals(null, configDiff.get(String.class, "last_path", null));
|
119
|
-
}
|
120
|
-
|
121
|
-
{ // if files are empty, keep the previous last_path.
|
122
|
-
config.set("last_path", "in/bb");
|
123
|
-
|
124
|
-
ObjectListing listing = listing("in/aa", 0L);
|
125
|
-
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
126
|
-
|
127
|
-
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
128
|
-
@Override
|
129
|
-
public List<TaskReport> run(TaskSource taskSource, int taskCount) {
|
130
|
-
assertEquals(0, taskCount);
|
131
|
-
assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
|
132
|
-
return emptyTaskReports(taskCount);
|
133
|
-
}
|
134
|
-
});
|
135
|
-
|
136
|
-
assertEquals("in/bb", configDiff.get(String.class, "last_path"));
|
137
|
-
}
|
103
|
+
assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path")); // keep the last_path
|
104
|
+
assertEquals(0, getRecords(config, output).size());
|
138
105
|
}
|
139
106
|
|
140
107
|
@Test
|
141
|
-
public void
|
108
|
+
public void useTotalFileCountLimit()
|
142
109
|
throws Exception
|
143
110
|
{
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
PluginTask task = config.loadConfig(plugin.getTaskClass());
|
148
|
-
FileList.Builder builder = new FileList.Builder();
|
149
|
-
builder.add("in/aa/a", 100);
|
150
|
-
task.setFiles(builder.build());
|
151
|
-
|
152
|
-
StringBuilder sbuf = new StringBuilder();
|
153
|
-
try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
|
154
|
-
LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
|
155
|
-
while (d.nextFile()) {
|
156
|
-
sbuf.append(d.poll());
|
157
|
-
}
|
158
|
-
}
|
159
|
-
assertEquals("aa", sbuf.toString());
|
160
|
-
}
|
111
|
+
ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
|
112
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
161
113
|
|
162
|
-
|
163
|
-
|
164
|
-
return Exec.newConfigSource()
|
165
|
-
.set("bucket", "my_bucket")
|
166
|
-
.set("path_prefix", "my_path_prefix")
|
167
|
-
.set("access_key_id", "my_access_key_id")
|
168
|
-
.set("secret_access_key", "my_secret_access_key");
|
114
|
+
assertNull(configDiff.get(String.class, "last_path"));
|
115
|
+
assertEquals(0, getRecords(config, output).size());
|
169
116
|
}
|
170
117
|
|
171
|
-
|
118
|
+
@Test
|
119
|
+
public void usePathMatchPattern()
|
120
|
+
throws Exception
|
172
121
|
{
|
173
|
-
|
174
|
-
|
175
|
-
return listing;
|
176
|
-
}
|
122
|
+
ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/match/");
|
123
|
+
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
177
124
|
|
178
|
-
|
179
|
-
|
180
|
-
doReturn(null).when(listing).getNextMarker();
|
181
|
-
return listing;
|
125
|
+
assertNull(configDiff.get(String.class, "last_path"));
|
126
|
+
assertEquals(0, getRecords(config, output).size());
|
182
127
|
}
|
183
128
|
|
184
|
-
static
|
129
|
+
static class Control
|
130
|
+
implements InputPlugin.Control
|
185
131
|
{
|
186
|
-
|
187
|
-
|
188
|
-
|
132
|
+
private FileInputRunner runner;
|
133
|
+
private PageOutput output;
|
134
|
+
|
135
|
+
Control(FileInputRunner runner, PageOutput output)
|
136
|
+
{
|
137
|
+
this.runner = runner;
|
138
|
+
this.output = output;
|
189
139
|
}
|
190
140
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
141
|
+
@Override
|
142
|
+
public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
143
|
+
{
|
144
|
+
List<TaskReport> reports = new ArrayList<>();
|
145
|
+
for (int i = 0; i < taskCount; i++) {
|
146
|
+
reports.add(runner.run(taskSource, schema, i, output));
|
147
|
+
}
|
148
|
+
return reports;
|
195
149
|
}
|
196
|
-
return builder.build();
|
197
150
|
}
|
198
151
|
|
199
|
-
static
|
152
|
+
static ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
|
200
153
|
{
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
154
|
+
ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
|
155
|
+
builder.put("type", "csv");
|
156
|
+
builder.put("newline", "CRLF");
|
157
|
+
builder.put("delimiter", ",");
|
158
|
+
builder.put("quote", "\"");
|
159
|
+
builder.put("escape", "\"");
|
160
|
+
builder.put("trim_if_not_quoted", false);
|
161
|
+
builder.put("skip_header_lines", 0);
|
162
|
+
builder.put("allow_extra_columns", false);
|
163
|
+
builder.put("allow_optional_columns", false);
|
164
|
+
builder.put("columns", schemaConfig);
|
165
|
+
return builder.build();
|
205
166
|
}
|
206
167
|
|
207
|
-
static
|
168
|
+
static ImmutableList<Object> schemaConfig()
|
208
169
|
{
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
170
|
+
ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
|
171
|
+
builder.add(ImmutableMap.of("name", "timestamp", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
|
172
|
+
builder.add(ImmutableMap.of("name", "host", "type", "string"));
|
173
|
+
builder.add(ImmutableMap.of("name", "path", "type", "string"));
|
174
|
+
builder.add(ImmutableMap.of("name", "method", "type", "string"));
|
175
|
+
builder.add(ImmutableMap.of("name", "referer", "type", "string"));
|
176
|
+
builder.add(ImmutableMap.of("name", "code", "type", "long"));
|
177
|
+
builder.add(ImmutableMap.of("name", "agent", "type", "string"));
|
178
|
+
builder.add(ImmutableMap.of("name", "user", "type", "string"));
|
179
|
+
builder.add(ImmutableMap.of("name", "size", "type", "long"));
|
180
|
+
return builder.build();
|
215
181
|
}
|
216
182
|
|
217
|
-
static
|
183
|
+
static void assertRecords(ConfigSource config, MockPageOutput output)
|
218
184
|
{
|
219
|
-
|
220
|
-
|
221
|
-
|
185
|
+
List<Object[]> records = getRecords(config, output);
|
186
|
+
|
187
|
+
assertEquals(2, records.size());
|
188
|
+
{
|
189
|
+
Object[] record = records.get(0);
|
190
|
+
assertEquals("2014-10-02 22:15:39 UTC", record[0].toString());
|
191
|
+
assertEquals("84.186.29.187", record[1]);
|
192
|
+
assertEquals("/category/electronics", record[2]);
|
193
|
+
assertEquals("GET", record[3]);
|
194
|
+
assertEquals("/category/music", record[4]);
|
195
|
+
assertEquals(200L, record[5]);
|
196
|
+
assertEquals("Mozilla/5.0", record[6]);
|
197
|
+
assertEquals("-", record[7]);
|
198
|
+
assertEquals(136L, record[8]);
|
199
|
+
}
|
200
|
+
|
201
|
+
{
|
202
|
+
Object[] record = records.get(1);
|
203
|
+
assertEquals("2014-10-02 22:15:01 UTC", record[0].toString());
|
204
|
+
assertEquals("140.36.216.47", record[1]);
|
205
|
+
assertEquals("/category/music?from=10", record[2]);
|
206
|
+
assertEquals("GET", record[3]);
|
207
|
+
assertEquals("-", record[4]);
|
208
|
+
assertEquals(200L, record[5]);
|
209
|
+
assertEquals("Mozilla/5.0", record[6]);
|
210
|
+
assertEquals("-", record[7]);
|
211
|
+
assertEquals(70L, record[8]);
|
222
212
|
}
|
223
|
-
return reports.build();
|
224
213
|
}
|
225
214
|
|
226
|
-
|
215
|
+
static List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
|
227
216
|
{
|
228
|
-
|
229
|
-
|
230
|
-
for (String path : list.get(i)) {
|
231
|
-
builder.add(path);
|
232
|
-
}
|
233
|
-
}
|
234
|
-
return builder.build();
|
217
|
+
Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
|
218
|
+
return Pages.toObjects(schema, output.pages);
|
235
219
|
}
|
236
220
|
}
|
@@ -2,6 +2,9 @@ package org.embulk.input.s3;
|
|
2
2
|
|
3
3
|
import com.amazonaws.services.s3.AmazonS3Client;
|
4
4
|
import com.amazonaws.services.s3.model.GetObjectRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
6
|
+
import com.amazonaws.services.s3.model.S3Object;
|
7
|
+
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
5
8
|
import org.embulk.EmbulkTestRuntime;
|
6
9
|
import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
|
7
10
|
import org.junit.Before;
|
@@ -9,11 +12,11 @@ import org.junit.Rule;
|
|
9
12
|
import org.junit.Test;
|
10
13
|
|
11
14
|
import java.io.BufferedReader;
|
15
|
+
import java.io.ByteArrayInputStream;
|
12
16
|
import java.io.InputStream;
|
13
17
|
import java.io.InputStreamReader;
|
14
18
|
|
15
19
|
import static org.junit.Assert.assertEquals;
|
16
|
-
import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
|
17
20
|
import static org.mockito.Matchers.any;
|
18
21
|
import static org.mockito.Mockito.doReturn;
|
19
22
|
import static org.mockito.Mockito.doThrow;
|
@@ -60,4 +63,14 @@ public class TestS3InputStreamReopener
|
|
60
63
|
}
|
61
64
|
}
|
62
65
|
}
|
66
|
+
|
67
|
+
static S3Object s3object(String key, String value)
|
68
|
+
{
|
69
|
+
S3Object o = new S3Object();
|
70
|
+
o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
|
71
|
+
ObjectMetadata om = new ObjectMetadata();
|
72
|
+
om.setContentLength(value.length());
|
73
|
+
o.setObjectMetadata(om);
|
74
|
+
return o;
|
75
|
+
}
|
63
76
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,13 +52,15 @@ files:
|
|
52
52
|
- src/main/java/org/embulk/input/s3/AwsCredentialsTask.java
|
53
53
|
- src/main/java/org/embulk/input/s3/FileList.java
|
54
54
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
55
|
+
- src/test/java/org/embulk/input/s3/TestAwsCredentials.java
|
55
56
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
56
57
|
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
58
|
+
- src/test/resources/sample_01.csv
|
57
59
|
- classpath/aws-java-sdk-core-1.10.33.jar
|
58
60
|
- classpath/aws-java-sdk-kms-1.10.33.jar
|
59
61
|
- classpath/aws-java-sdk-s3-1.10.33.jar
|
60
62
|
- classpath/commons-codec-1.6.jar
|
61
|
-
- classpath/embulk-input-s3-0.2.
|
63
|
+
- classpath/embulk-input-s3-0.2.5.jar
|
62
64
|
- classpath/httpclient-4.3.6.jar
|
63
65
|
- classpath/httpcore-4.3.3.jar
|
64
66
|
- classpath/jcl-over-slf4j-1.7.12.jar
|
Binary file
|