embulk-input-s3 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.2.3.jar +0 -0
- data/classpath/jcl-over-slf4j-1.7.12.jar +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +4 -1
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +199 -20
- data/src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java +63 -0
- metadata +5 -4
- data/classpath/commons-logging-1.1.3.jar +0 -0
- data/classpath/embulk-input-s3-0.2.2.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f03ab171561f7cca3ff6fe8e17c63b73f31e2db3
|
4
|
+
data.tar.gz: a636f66a873c449741f09c51a7b415f550517d26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24baf759d3956a23317ed73f5878d9ec8d66bc493ba344131a16b325fb3558dd5fbab332dacfcac2f7abc89ecd3518cec89f4c232c7d17135159fa95fb444fac
|
7
|
+
data.tar.gz: c83e6f98d5d03e45f83641d879a80c41be41d292d999a1b90ab783ab02f296faa77a3a2276b447804e78dd2908a8b1f7cc026105688eff34fea0fdbbde92130b
|
Binary file
|
Binary file
|
@@ -6,6 +6,8 @@ import java.util.Collections;
|
|
6
6
|
import java.io.IOException;
|
7
7
|
import java.io.InterruptedIOException;
|
8
8
|
import java.io.InputStream;
|
9
|
+
|
10
|
+
import com.google.common.annotations.VisibleForTesting;
|
9
11
|
import com.google.common.collect.ImmutableList;
|
10
12
|
import com.google.common.base.Optional;
|
11
13
|
import com.google.common.base.Throwables;
|
@@ -212,7 +214,8 @@ public abstract class AbstractS3FileInputPlugin
|
|
212
214
|
return new S3FileInput(task, taskIndex);
|
213
215
|
}
|
214
216
|
|
215
|
-
|
217
|
+
@VisibleForTesting
|
218
|
+
static class S3InputStreamReopener
|
216
219
|
implements ResumableInputStream.Reopener
|
217
220
|
{
|
218
221
|
private final Logger log = Exec.getLogger(S3InputStreamReopener.class);
|
@@ -1,42 +1,221 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import static org.junit.Assert.*;
|
4
|
-
import java.util.List;
|
5
|
-
import org.junit.Test;
|
6
|
-
import org.mockito.Mockito;
|
7
|
-
|
8
|
-
import com.google.common.base.Optional;
|
9
|
-
import com.google.common.collect.ImmutableList;
|
10
3
|
import com.amazonaws.services.s3.AmazonS3Client;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectRequest;
|
11
5
|
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
12
6
|
import com.amazonaws.services.s3.model.ObjectListing;
|
7
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
8
|
+
import com.amazonaws.services.s3.model.S3Object;
|
9
|
+
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
13
10
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.collect.ImmutableList;
|
13
|
+
import org.embulk.EmbulkTestRuntime;
|
14
|
+
import org.embulk.config.ConfigDiff;
|
15
|
+
import org.embulk.config.ConfigSource;
|
16
|
+
import org.embulk.config.TaskReport;
|
17
|
+
import org.embulk.config.TaskSource;
|
18
|
+
import org.embulk.input.s3.AbstractS3FileInputPlugin.PluginTask;
|
19
|
+
import org.embulk.input.s3.AbstractS3FileInputPlugin.S3FileInput;
|
20
|
+
import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
21
|
+
import org.embulk.spi.Exec;
|
22
|
+
import org.embulk.spi.FileInputPlugin;
|
23
|
+
import org.embulk.spi.util.LineDecoder;
|
24
|
+
import org.junit.Before;
|
25
|
+
import org.junit.Rule;
|
26
|
+
import org.junit.Test;
|
27
|
+
|
28
|
+
import java.io.ByteArrayInputStream;
|
29
|
+
import java.util.Arrays;
|
30
|
+
import java.util.List;
|
31
|
+
|
32
|
+
import static org.junit.Assert.*;
|
33
|
+
import static org.mockito.Matchers.any;
|
34
|
+
import static org.mockito.Mockito.doReturn;
|
35
|
+
import static org.mockito.Mockito.mock;
|
36
|
+
import static org.mockito.Mockito.spy;
|
14
37
|
|
15
38
|
public class TestS3FileInputPlugin
|
16
39
|
{
|
40
|
+
@Rule
|
41
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
42
|
+
|
43
|
+
private ConfigSource config;
|
44
|
+
private S3FileInputPlugin plugin;
|
45
|
+
private AmazonS3Client client;
|
46
|
+
|
47
|
+
@Before
|
48
|
+
public void createResources()
|
49
|
+
{
|
50
|
+
config = config();
|
51
|
+
plugin = spy(runtime.getInstance(S3FileInputPlugin.class));
|
52
|
+
client = mock(AmazonS3Client.class);
|
53
|
+
}
|
54
|
+
|
55
|
+
@Test
|
56
|
+
public void checkS3ClientCreatedSuccessfully()
|
57
|
+
{
|
58
|
+
PluginTask task = config().loadConfig(plugin.getTaskClass());
|
59
|
+
plugin.newS3Client(task);
|
60
|
+
}
|
61
|
+
|
17
62
|
@Test
|
18
63
|
public void listS3FilesByPrefix()
|
19
64
|
{
|
20
65
|
// AWSS3Client returns list1 for the first iteration and list2 next.
|
21
|
-
List<S3ObjectSummary> list1 =
|
22
|
-
|
23
|
-
|
24
|
-
ObjectListing ol = Mockito.mock(ObjectListing.class);
|
66
|
+
List<S3ObjectSummary> list1 = s3objects("in/", 0L, "in/file/", 0L, "in/file/sample.csv.gz", 12345L);
|
67
|
+
List<S3ObjectSummary> list2 = s3objects("sample2.csv.gz", 0L);
|
68
|
+
ObjectListing ol = mock(ObjectListing.class);
|
25
69
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
Mockito.doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
|
70
|
+
doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
|
71
|
+
doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
|
72
|
+
doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
|
30
73
|
|
31
74
|
// It counts only size != 0 files.
|
32
75
|
assertEquals(1, S3FileInputPlugin.listS3FilesByPrefix(client, "bucketName", "prefix", Optional.<String>absent()).size());
|
33
76
|
}
|
34
77
|
|
35
|
-
|
78
|
+
@Test
|
79
|
+
public void checkLastPath()
|
80
|
+
{
|
81
|
+
doReturn(null).when(client).listObjects(any(ListObjectsRequest.class));
|
82
|
+
doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
|
83
|
+
|
84
|
+
{ // set a last file to last_path
|
85
|
+
ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
|
86
|
+
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
87
|
+
|
88
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
89
|
+
@Override
|
90
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
91
|
+
{
|
92
|
+
assertEquals(3, taskCount);
|
93
|
+
List<String> files = taskSource.loadTask(S3PluginTask.class).getFiles();
|
94
|
+
assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
|
95
|
+
return emptyTaskReports(taskCount);
|
96
|
+
}
|
97
|
+
});
|
98
|
+
|
99
|
+
assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
|
100
|
+
}
|
101
|
+
|
102
|
+
{ // if files are empty and last_path is not specified, last_path is not set.
|
103
|
+
ObjectListing listing = listing("in/aa", 0L);
|
104
|
+
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
105
|
+
|
106
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
107
|
+
@Override
|
108
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
109
|
+
{
|
110
|
+
assertEquals(0, taskCount);
|
111
|
+
assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
|
112
|
+
return emptyTaskReports(taskCount);
|
113
|
+
}
|
114
|
+
});
|
115
|
+
|
116
|
+
assertFalse(configDiff.has("last_path"));
|
117
|
+
}
|
118
|
+
|
119
|
+
{ // if files are empty, keep the previous last_path.
|
120
|
+
config.set("last_path", "in/bb");
|
121
|
+
|
122
|
+
ObjectListing listing = listing("in/aa", 0L);
|
123
|
+
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
124
|
+
|
125
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
126
|
+
@Override
|
127
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount) {
|
128
|
+
assertEquals(0, taskCount);
|
129
|
+
assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
|
130
|
+
return emptyTaskReports(taskCount);
|
131
|
+
}
|
132
|
+
});
|
133
|
+
|
134
|
+
assertEquals("in/bb", configDiff.get(String.class, "last_path"));
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
138
|
+
@Test
|
139
|
+
public void checkS3FileInputByOpen()
|
140
|
+
throws Exception
|
141
|
+
{
|
142
|
+
doReturn(s3object("in/aa/a", "aa")).when(client).getObject(any(GetObjectRequest.class));
|
143
|
+
doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
|
144
|
+
|
145
|
+
PluginTask task = config.loadConfig(plugin.getTaskClass());
|
146
|
+
task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
|
147
|
+
|
148
|
+
StringBuilder sbuf = new StringBuilder();
|
149
|
+
try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
|
150
|
+
LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
|
151
|
+
while (d.nextFile()) {
|
152
|
+
sbuf.append(d.poll());
|
153
|
+
}
|
154
|
+
}
|
155
|
+
assertEquals("aa", sbuf.toString());
|
156
|
+
}
|
157
|
+
|
158
|
+
public static ConfigSource config()
|
159
|
+
{
|
160
|
+
return Exec.newConfigSource()
|
161
|
+
.set("bucket", "my_bucket")
|
162
|
+
.set("path_prefix", "my_path_prefix")
|
163
|
+
.set("access_key_id", "my_access_key_id")
|
164
|
+
.set("secret_access_key", "my_secret_access_key");
|
165
|
+
}
|
166
|
+
|
167
|
+
static ObjectListing listing(Object... keySizes)
|
168
|
+
{
|
169
|
+
ObjectListing listing = mock(ObjectListing.class);
|
170
|
+
if (keySizes == null) {
|
171
|
+
return listing;
|
172
|
+
}
|
173
|
+
|
174
|
+
List<S3ObjectSummary> s3objects = s3objects(keySizes);
|
175
|
+
doReturn(s3objects).when(listing).getObjectSummaries();
|
176
|
+
doReturn(null).when(listing).getNextMarker();
|
177
|
+
return listing;
|
178
|
+
}
|
179
|
+
|
180
|
+
static List<S3ObjectSummary> s3objects(Object... keySizes)
|
181
|
+
{
|
182
|
+
ImmutableList.Builder<S3ObjectSummary> builder = new ImmutableList.Builder<>();
|
183
|
+
if (keySizes == null) {
|
184
|
+
return builder.build();
|
185
|
+
}
|
186
|
+
|
187
|
+
for (int i = 0; i < keySizes.length; i += 2) {
|
188
|
+
String key = (String) keySizes[i];
|
189
|
+
long size = (Long) keySizes[i + 1];
|
190
|
+
builder.add(s3object(key, size));
|
191
|
+
}
|
192
|
+
return builder.build();
|
193
|
+
}
|
194
|
+
|
195
|
+
static S3ObjectSummary s3object(String key, long size)
|
196
|
+
{
|
197
|
+
S3ObjectSummary o = new S3ObjectSummary();
|
198
|
+
o.setKey(key);
|
199
|
+
o.setSize(size);
|
200
|
+
return o;
|
201
|
+
}
|
202
|
+
|
203
|
+
static S3Object s3object(String key, String value)
|
204
|
+
{
|
205
|
+
S3Object o = new S3Object();
|
206
|
+
o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
|
207
|
+
ObjectMetadata om = new ObjectMetadata();
|
208
|
+
om.setContentLength(value.length());
|
209
|
+
o.setObjectMetadata(om);
|
210
|
+
return o;
|
211
|
+
}
|
212
|
+
|
213
|
+
static List<TaskReport> emptyTaskReports(int taskCount)
|
36
214
|
{
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
215
|
+
ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
|
216
|
+
for (int i = 0; i < taskCount; i++) {
|
217
|
+
reports.add(Exec.newTaskReport());
|
218
|
+
}
|
219
|
+
return reports.build();
|
41
220
|
}
|
42
221
|
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3Client;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectRequest;
|
5
|
+
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
|
7
|
+
import org.junit.Before;
|
8
|
+
import org.junit.Rule;
|
9
|
+
import org.junit.Test;
|
10
|
+
|
11
|
+
import java.io.BufferedReader;
|
12
|
+
import java.io.InputStream;
|
13
|
+
import java.io.InputStreamReader;
|
14
|
+
|
15
|
+
import static org.junit.Assert.assertEquals;
|
16
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
|
17
|
+
import static org.mockito.Matchers.any;
|
18
|
+
import static org.mockito.Mockito.doReturn;
|
19
|
+
import static org.mockito.Mockito.doThrow;
|
20
|
+
import static org.mockito.Mockito.mock;
|
21
|
+
|
22
|
+
public class TestS3InputStreamReopener
|
23
|
+
{
|
24
|
+
@Rule
|
25
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
26
|
+
|
27
|
+
private AmazonS3Client client;
|
28
|
+
|
29
|
+
@Before
|
30
|
+
public void createResources()
|
31
|
+
{
|
32
|
+
client = mock(AmazonS3Client.class);
|
33
|
+
}
|
34
|
+
|
35
|
+
@Test
|
36
|
+
public void reopenS3FileByReopener()
|
37
|
+
throws Exception
|
38
|
+
{
|
39
|
+
String content = "value";
|
40
|
+
|
41
|
+
{ // not retry
|
42
|
+
doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
|
43
|
+
|
44
|
+
S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
|
45
|
+
|
46
|
+
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
47
|
+
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
48
|
+
assertEquals("value", r.readLine());
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
{ // retry once
|
53
|
+
doThrow(new RuntimeException()).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
|
54
|
+
|
55
|
+
S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
|
56
|
+
|
57
|
+
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
58
|
+
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
59
|
+
assertEquals("value", r.readLine());
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -50,14 +50,15 @@ files:
|
|
50
50
|
- src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
|
51
51
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
52
52
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
53
|
+
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
53
54
|
- classpath/aws-java-sdk-core-1.9.22.jar
|
54
55
|
- classpath/aws-java-sdk-kms-1.9.22.jar
|
55
56
|
- classpath/aws-java-sdk-s3-1.9.22.jar
|
56
57
|
- classpath/commons-codec-1.6.jar
|
57
|
-
- classpath/
|
58
|
-
- classpath/embulk-input-s3-0.2.2.jar
|
58
|
+
- classpath/embulk-input-s3-0.2.3.jar
|
59
59
|
- classpath/httpclient-4.3.4.jar
|
60
60
|
- classpath/httpcore-4.3.2.jar
|
61
|
+
- classpath/jcl-over-slf4j-1.7.12.jar
|
61
62
|
- classpath/joda-time-2.8.2.jar
|
62
63
|
homepage: https://github.com/embulk/embulk-input-s3
|
63
64
|
licenses:
|
Binary file
|
Binary file
|