embulk-input-s3 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.2.3.jar +0 -0
- data/classpath/jcl-over-slf4j-1.7.12.jar +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +4 -1
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +199 -20
- data/src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java +63 -0
- metadata +5 -4
- data/classpath/commons-logging-1.1.3.jar +0 -0
- data/classpath/embulk-input-s3-0.2.2.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f03ab171561f7cca3ff6fe8e17c63b73f31e2db3
|
4
|
+
data.tar.gz: a636f66a873c449741f09c51a7b415f550517d26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24baf759d3956a23317ed73f5878d9ec8d66bc493ba344131a16b325fb3558dd5fbab332dacfcac2f7abc89ecd3518cec89f4c232c7d17135159fa95fb444fac
|
7
|
+
data.tar.gz: c83e6f98d5d03e45f83641d879a80c41be41d292d999a1b90ab783ab02f296faa77a3a2276b447804e78dd2908a8b1f7cc026105688eff34fea0fdbbde92130b
|
Binary file
|
Binary file
|
@@ -6,6 +6,8 @@ import java.util.Collections;
|
|
6
6
|
import java.io.IOException;
|
7
7
|
import java.io.InterruptedIOException;
|
8
8
|
import java.io.InputStream;
|
9
|
+
|
10
|
+
import com.google.common.annotations.VisibleForTesting;
|
9
11
|
import com.google.common.collect.ImmutableList;
|
10
12
|
import com.google.common.base.Optional;
|
11
13
|
import com.google.common.base.Throwables;
|
@@ -212,7 +214,8 @@ public abstract class AbstractS3FileInputPlugin
|
|
212
214
|
return new S3FileInput(task, taskIndex);
|
213
215
|
}
|
214
216
|
|
215
|
-
|
217
|
+
@VisibleForTesting
|
218
|
+
static class S3InputStreamReopener
|
216
219
|
implements ResumableInputStream.Reopener
|
217
220
|
{
|
218
221
|
private final Logger log = Exec.getLogger(S3InputStreamReopener.class);
|
@@ -1,42 +1,221 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
|
-
import static org.junit.Assert.*;
|
4
|
-
import java.util.List;
|
5
|
-
import org.junit.Test;
|
6
|
-
import org.mockito.Mockito;
|
7
|
-
|
8
|
-
import com.google.common.base.Optional;
|
9
|
-
import com.google.common.collect.ImmutableList;
|
10
3
|
import com.amazonaws.services.s3.AmazonS3Client;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectRequest;
|
11
5
|
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
12
6
|
import com.amazonaws.services.s3.model.ObjectListing;
|
7
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
8
|
+
import com.amazonaws.services.s3.model.S3Object;
|
9
|
+
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
13
10
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.collect.ImmutableList;
|
13
|
+
import org.embulk.EmbulkTestRuntime;
|
14
|
+
import org.embulk.config.ConfigDiff;
|
15
|
+
import org.embulk.config.ConfigSource;
|
16
|
+
import org.embulk.config.TaskReport;
|
17
|
+
import org.embulk.config.TaskSource;
|
18
|
+
import org.embulk.input.s3.AbstractS3FileInputPlugin.PluginTask;
|
19
|
+
import org.embulk.input.s3.AbstractS3FileInputPlugin.S3FileInput;
|
20
|
+
import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
21
|
+
import org.embulk.spi.Exec;
|
22
|
+
import org.embulk.spi.FileInputPlugin;
|
23
|
+
import org.embulk.spi.util.LineDecoder;
|
24
|
+
import org.junit.Before;
|
25
|
+
import org.junit.Rule;
|
26
|
+
import org.junit.Test;
|
27
|
+
|
28
|
+
import java.io.ByteArrayInputStream;
|
29
|
+
import java.util.Arrays;
|
30
|
+
import java.util.List;
|
31
|
+
|
32
|
+
import static org.junit.Assert.*;
|
33
|
+
import static org.mockito.Matchers.any;
|
34
|
+
import static org.mockito.Mockito.doReturn;
|
35
|
+
import static org.mockito.Mockito.mock;
|
36
|
+
import static org.mockito.Mockito.spy;
|
14
37
|
|
15
38
|
public class TestS3FileInputPlugin
|
16
39
|
{
|
40
|
+
@Rule
|
41
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
42
|
+
|
43
|
+
private ConfigSource config;
|
44
|
+
private S3FileInputPlugin plugin;
|
45
|
+
private AmazonS3Client client;
|
46
|
+
|
47
|
+
@Before
|
48
|
+
public void createResources()
|
49
|
+
{
|
50
|
+
config = config();
|
51
|
+
plugin = spy(runtime.getInstance(S3FileInputPlugin.class));
|
52
|
+
client = mock(AmazonS3Client.class);
|
53
|
+
}
|
54
|
+
|
55
|
+
@Test
|
56
|
+
public void checkS3ClientCreatedSuccessfully()
|
57
|
+
{
|
58
|
+
PluginTask task = config().loadConfig(plugin.getTaskClass());
|
59
|
+
plugin.newS3Client(task);
|
60
|
+
}
|
61
|
+
|
17
62
|
@Test
|
18
63
|
public void listS3FilesByPrefix()
|
19
64
|
{
|
20
65
|
// AWSS3Client returns list1 for the first iteration and list2 next.
|
21
|
-
List<S3ObjectSummary> list1 =
|
22
|
-
|
23
|
-
|
24
|
-
ObjectListing ol = Mockito.mock(ObjectListing.class);
|
66
|
+
List<S3ObjectSummary> list1 = s3objects("in/", 0L, "in/file/", 0L, "in/file/sample.csv.gz", 12345L);
|
67
|
+
List<S3ObjectSummary> list2 = s3objects("sample2.csv.gz", 0L);
|
68
|
+
ObjectListing ol = mock(ObjectListing.class);
|
25
69
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
Mockito.doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
|
70
|
+
doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
|
71
|
+
doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
|
72
|
+
doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
|
30
73
|
|
31
74
|
// It counts only size != 0 files.
|
32
75
|
assertEquals(1, S3FileInputPlugin.listS3FilesByPrefix(client, "bucketName", "prefix", Optional.<String>absent()).size());
|
33
76
|
}
|
34
77
|
|
35
|
-
|
78
|
+
@Test
|
79
|
+
public void checkLastPath()
|
80
|
+
{
|
81
|
+
doReturn(null).when(client).listObjects(any(ListObjectsRequest.class));
|
82
|
+
doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
|
83
|
+
|
84
|
+
{ // set a last file to last_path
|
85
|
+
ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
|
86
|
+
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
87
|
+
|
88
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
89
|
+
@Override
|
90
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
91
|
+
{
|
92
|
+
assertEquals(3, taskCount);
|
93
|
+
List<String> files = taskSource.loadTask(S3PluginTask.class).getFiles();
|
94
|
+
assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
|
95
|
+
return emptyTaskReports(taskCount);
|
96
|
+
}
|
97
|
+
});
|
98
|
+
|
99
|
+
assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
|
100
|
+
}
|
101
|
+
|
102
|
+
{ // if files are empty and last_path is not specified, last_path is not set.
|
103
|
+
ObjectListing listing = listing("in/aa", 0L);
|
104
|
+
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
105
|
+
|
106
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
107
|
+
@Override
|
108
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
109
|
+
{
|
110
|
+
assertEquals(0, taskCount);
|
111
|
+
assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
|
112
|
+
return emptyTaskReports(taskCount);
|
113
|
+
}
|
114
|
+
});
|
115
|
+
|
116
|
+
assertFalse(configDiff.has("last_path"));
|
117
|
+
}
|
118
|
+
|
119
|
+
{ // if files are empty, keep the previous last_path.
|
120
|
+
config.set("last_path", "in/bb");
|
121
|
+
|
122
|
+
ObjectListing listing = listing("in/aa", 0L);
|
123
|
+
doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
|
124
|
+
|
125
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
126
|
+
@Override
|
127
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount) {
|
128
|
+
assertEquals(0, taskCount);
|
129
|
+
assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
|
130
|
+
return emptyTaskReports(taskCount);
|
131
|
+
}
|
132
|
+
});
|
133
|
+
|
134
|
+
assertEquals("in/bb", configDiff.get(String.class, "last_path"));
|
135
|
+
}
|
136
|
+
}
|
137
|
+
|
138
|
+
@Test
|
139
|
+
public void checkS3FileInputByOpen()
|
140
|
+
throws Exception
|
141
|
+
{
|
142
|
+
doReturn(s3object("in/aa/a", "aa")).when(client).getObject(any(GetObjectRequest.class));
|
143
|
+
doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
|
144
|
+
|
145
|
+
PluginTask task = config.loadConfig(plugin.getTaskClass());
|
146
|
+
task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
|
147
|
+
|
148
|
+
StringBuilder sbuf = new StringBuilder();
|
149
|
+
try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
|
150
|
+
LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
|
151
|
+
while (d.nextFile()) {
|
152
|
+
sbuf.append(d.poll());
|
153
|
+
}
|
154
|
+
}
|
155
|
+
assertEquals("aa", sbuf.toString());
|
156
|
+
}
|
157
|
+
|
158
|
+
public static ConfigSource config()
|
159
|
+
{
|
160
|
+
return Exec.newConfigSource()
|
161
|
+
.set("bucket", "my_bucket")
|
162
|
+
.set("path_prefix", "my_path_prefix")
|
163
|
+
.set("access_key_id", "my_access_key_id")
|
164
|
+
.set("secret_access_key", "my_secret_access_key");
|
165
|
+
}
|
166
|
+
|
167
|
+
static ObjectListing listing(Object... keySizes)
|
168
|
+
{
|
169
|
+
ObjectListing listing = mock(ObjectListing.class);
|
170
|
+
if (keySizes == null) {
|
171
|
+
return listing;
|
172
|
+
}
|
173
|
+
|
174
|
+
List<S3ObjectSummary> s3objects = s3objects(keySizes);
|
175
|
+
doReturn(s3objects).when(listing).getObjectSummaries();
|
176
|
+
doReturn(null).when(listing).getNextMarker();
|
177
|
+
return listing;
|
178
|
+
}
|
179
|
+
|
180
|
+
static List<S3ObjectSummary> s3objects(Object... keySizes)
|
181
|
+
{
|
182
|
+
ImmutableList.Builder<S3ObjectSummary> builder = new ImmutableList.Builder<>();
|
183
|
+
if (keySizes == null) {
|
184
|
+
return builder.build();
|
185
|
+
}
|
186
|
+
|
187
|
+
for (int i = 0; i < keySizes.length; i += 2) {
|
188
|
+
String key = (String) keySizes[i];
|
189
|
+
long size = (Long) keySizes[i + 1];
|
190
|
+
builder.add(s3object(key, size));
|
191
|
+
}
|
192
|
+
return builder.build();
|
193
|
+
}
|
194
|
+
|
195
|
+
static S3ObjectSummary s3object(String key, long size)
|
196
|
+
{
|
197
|
+
S3ObjectSummary o = new S3ObjectSummary();
|
198
|
+
o.setKey(key);
|
199
|
+
o.setSize(size);
|
200
|
+
return o;
|
201
|
+
}
|
202
|
+
|
203
|
+
static S3Object s3object(String key, String value)
|
204
|
+
{
|
205
|
+
S3Object o = new S3Object();
|
206
|
+
o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
|
207
|
+
ObjectMetadata om = new ObjectMetadata();
|
208
|
+
om.setContentLength(value.length());
|
209
|
+
o.setObjectMetadata(om);
|
210
|
+
return o;
|
211
|
+
}
|
212
|
+
|
213
|
+
static List<TaskReport> emptyTaskReports(int taskCount)
|
36
214
|
{
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
215
|
+
ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
|
216
|
+
for (int i = 0; i < taskCount; i++) {
|
217
|
+
reports.add(Exec.newTaskReport());
|
218
|
+
}
|
219
|
+
return reports.build();
|
41
220
|
}
|
42
221
|
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
package org.embulk.input.s3;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3Client;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectRequest;
|
5
|
+
import org.embulk.EmbulkTestRuntime;
|
6
|
+
import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
|
7
|
+
import org.junit.Before;
|
8
|
+
import org.junit.Rule;
|
9
|
+
import org.junit.Test;
|
10
|
+
|
11
|
+
import java.io.BufferedReader;
|
12
|
+
import java.io.InputStream;
|
13
|
+
import java.io.InputStreamReader;
|
14
|
+
|
15
|
+
import static org.junit.Assert.assertEquals;
|
16
|
+
import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
|
17
|
+
import static org.mockito.Matchers.any;
|
18
|
+
import static org.mockito.Mockito.doReturn;
|
19
|
+
import static org.mockito.Mockito.doThrow;
|
20
|
+
import static org.mockito.Mockito.mock;
|
21
|
+
|
22
|
+
public class TestS3InputStreamReopener
|
23
|
+
{
|
24
|
+
@Rule
|
25
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
26
|
+
|
27
|
+
private AmazonS3Client client;
|
28
|
+
|
29
|
+
@Before
|
30
|
+
public void createResources()
|
31
|
+
{
|
32
|
+
client = mock(AmazonS3Client.class);
|
33
|
+
}
|
34
|
+
|
35
|
+
@Test
|
36
|
+
public void reopenS3FileByReopener()
|
37
|
+
throws Exception
|
38
|
+
{
|
39
|
+
String content = "value";
|
40
|
+
|
41
|
+
{ // not retry
|
42
|
+
doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
|
43
|
+
|
44
|
+
S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
|
45
|
+
|
46
|
+
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
47
|
+
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
48
|
+
assertEquals("value", r.readLine());
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
{ // retry once
|
53
|
+
doThrow(new RuntimeException()).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
|
54
|
+
|
55
|
+
S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
|
56
|
+
|
57
|
+
try (InputStream in = opener.reopen(0, new RuntimeException())) {
|
58
|
+
BufferedReader r = new BufferedReader(new InputStreamReader(in));
|
59
|
+
assertEquals("value", r.readLine());
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -50,14 +50,15 @@ files:
|
|
50
50
|
- src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
|
51
51
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
52
52
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
53
|
+
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
53
54
|
- classpath/aws-java-sdk-core-1.9.22.jar
|
54
55
|
- classpath/aws-java-sdk-kms-1.9.22.jar
|
55
56
|
- classpath/aws-java-sdk-s3-1.9.22.jar
|
56
57
|
- classpath/commons-codec-1.6.jar
|
57
|
-
- classpath/
|
58
|
-
- classpath/embulk-input-s3-0.2.2.jar
|
58
|
+
- classpath/embulk-input-s3-0.2.3.jar
|
59
59
|
- classpath/httpclient-4.3.4.jar
|
60
60
|
- classpath/httpcore-4.3.2.jar
|
61
|
+
- classpath/jcl-over-slf4j-1.7.12.jar
|
61
62
|
- classpath/joda-time-2.8.2.jar
|
62
63
|
homepage: https://github.com/embulk/embulk-input-s3
|
63
64
|
licenses:
|
Binary file
|
Binary file
|