embulk-input-s3 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c12721e3646d935cb83dd17b9200134cab108b1e
4
- data.tar.gz: 0237d9c3e8e7952f52143893a101e3a3e08cc80e
3
+ metadata.gz: f03ab171561f7cca3ff6fe8e17c63b73f31e2db3
4
+ data.tar.gz: a636f66a873c449741f09c51a7b415f550517d26
5
5
  SHA512:
6
- metadata.gz: edb9e5d400d0de2c741e7aecd4654695976a856ecaedf87592b4cd65b25c5682b3b677bfd40aebc1e5e0f771e61d6f863768120edc971a9e1f2255ad2f947c3f
7
- data.tar.gz: 223d01c003770e0c78ba646f7cd9d5c24d3ba26ffd1ba710ff62b88efbdffaa892c43857b2d95c0d1b3400d2f68e77678e75c8738bdd2c40b2880ff1bdd9a29d
6
+ metadata.gz: 24baf759d3956a23317ed73f5878d9ec8d66bc493ba344131a16b325fb3558dd5fbab332dacfcac2f7abc89ecd3518cec89f4c232c7d17135159fa95fb444fac
7
+ data.tar.gz: c83e6f98d5d03e45f83641d879a80c41be41d292d999a1b90ab783ab02f296faa77a3a2276b447804e78dd2908a8b1f7cc026105688eff34fea0fdbbde92130b
Binary file
Binary file
@@ -6,6 +6,8 @@ import java.util.Collections;
6
6
  import java.io.IOException;
7
7
  import java.io.InterruptedIOException;
8
8
  import java.io.InputStream;
9
+
10
+ import com.google.common.annotations.VisibleForTesting;
9
11
  import com.google.common.collect.ImmutableList;
10
12
  import com.google.common.base.Optional;
11
13
  import com.google.common.base.Throwables;
@@ -212,7 +214,8 @@ public abstract class AbstractS3FileInputPlugin
212
214
  return new S3FileInput(task, taskIndex);
213
215
  }
214
216
 
215
- private static class S3InputStreamReopener
217
+ @VisibleForTesting
218
+ static class S3InputStreamReopener
216
219
  implements ResumableInputStream.Reopener
217
220
  {
218
221
  private final Logger log = Exec.getLogger(S3InputStreamReopener.class);
@@ -1,42 +1,221 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import static org.junit.Assert.*;
4
- import java.util.List;
5
- import org.junit.Test;
6
- import org.mockito.Mockito;
7
-
8
- import com.google.common.base.Optional;
9
- import com.google.common.collect.ImmutableList;
10
3
  import com.amazonaws.services.s3.AmazonS3Client;
4
+ import com.amazonaws.services.s3.model.GetObjectRequest;
11
5
  import com.amazonaws.services.s3.model.ListObjectsRequest;
12
6
  import com.amazonaws.services.s3.model.ObjectListing;
7
+ import com.amazonaws.services.s3.model.ObjectMetadata;
8
+ import com.amazonaws.services.s3.model.S3Object;
9
+ import com.amazonaws.services.s3.model.S3ObjectInputStream;
13
10
  import com.amazonaws.services.s3.model.S3ObjectSummary;
11
+ import com.google.common.base.Optional;
12
+ import com.google.common.collect.ImmutableList;
13
+ import org.embulk.EmbulkTestRuntime;
14
+ import org.embulk.config.ConfigDiff;
15
+ import org.embulk.config.ConfigSource;
16
+ import org.embulk.config.TaskReport;
17
+ import org.embulk.config.TaskSource;
18
+ import org.embulk.input.s3.AbstractS3FileInputPlugin.PluginTask;
19
+ import org.embulk.input.s3.AbstractS3FileInputPlugin.S3FileInput;
20
+ import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
21
+ import org.embulk.spi.Exec;
22
+ import org.embulk.spi.FileInputPlugin;
23
+ import org.embulk.spi.util.LineDecoder;
24
+ import org.junit.Before;
25
+ import org.junit.Rule;
26
+ import org.junit.Test;
27
+
28
+ import java.io.ByteArrayInputStream;
29
+ import java.util.Arrays;
30
+ import java.util.List;
31
+
32
+ import static org.junit.Assert.*;
33
+ import static org.mockito.Matchers.any;
34
+ import static org.mockito.Mockito.doReturn;
35
+ import static org.mockito.Mockito.mock;
36
+ import static org.mockito.Mockito.spy;
14
37
 
15
38
  public class TestS3FileInputPlugin
16
39
  {
40
+ @Rule
41
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
42
+
43
+ private ConfigSource config;
44
+ private S3FileInputPlugin plugin;
45
+ private AmazonS3Client client;
46
+
47
+ @Before
48
+ public void createResources()
49
+ {
50
+ config = config();
51
+ plugin = spy(runtime.getInstance(S3FileInputPlugin.class));
52
+ client = mock(AmazonS3Client.class);
53
+ }
54
+
55
+ @Test
56
+ public void checkS3ClientCreatedSuccessfully()
57
+ {
58
+ PluginTask task = config().loadConfig(plugin.getTaskClass());
59
+ plugin.newS3Client(task);
60
+ }
61
+
17
62
  @Test
18
63
  public void listS3FilesByPrefix()
19
64
  {
20
65
  // AWSS3Client returns list1 for the first iteration and list2 next.
21
- List<S3ObjectSummary> list1 = ImmutableList.<S3ObjectSummary> of(bucket("in/", 0), bucket("in/file/", 0),
22
- bucket("in/file/sample.csv.gz", 12345));
23
- List<S3ObjectSummary> list2 = ImmutableList.<S3ObjectSummary> of(bucket("sample2.csv.gz", 0));
24
- ObjectListing ol = Mockito.mock(ObjectListing.class);
66
+ List<S3ObjectSummary> list1 = s3objects("in/", 0L, "in/file/", 0L, "in/file/sample.csv.gz", 12345L);
67
+ List<S3ObjectSummary> list2 = s3objects("sample2.csv.gz", 0L);
68
+ ObjectListing ol = mock(ObjectListing.class);
25
69
 
26
- Mockito.doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
27
- AmazonS3Client client = Mockito.mock(AmazonS3Client.class);
28
- Mockito.doReturn(ol).when(client).listObjects(Mockito.any(ListObjectsRequest.class));
29
- Mockito.doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
70
+ doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
71
+ doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
72
+ doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
30
73
 
31
74
  // It counts only size != 0 files.
32
75
  assertEquals(1, S3FileInputPlugin.listS3FilesByPrefix(client, "bucketName", "prefix", Optional.<String>absent()).size());
33
76
  }
34
77
 
35
- private S3ObjectSummary bucket(String key, long size)
78
+ @Test
79
+ public void checkLastPath()
80
+ {
81
+ doReturn(null).when(client).listObjects(any(ListObjectsRequest.class));
82
+ doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
83
+
84
+ { // set a last file to last_path
85
+ ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
86
+ doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
87
+
88
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
89
+ @Override
90
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
91
+ {
92
+ assertEquals(3, taskCount);
93
+ List<String> files = taskSource.loadTask(S3PluginTask.class).getFiles();
94
+ assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
95
+ return emptyTaskReports(taskCount);
96
+ }
97
+ });
98
+
99
+ assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
100
+ }
101
+
102
+ { // if files are empty and last_path is not specified, last_path is not set.
103
+ ObjectListing listing = listing("in/aa", 0L);
104
+ doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
105
+
106
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
107
+ @Override
108
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
109
+ {
110
+ assertEquals(0, taskCount);
111
+ assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
112
+ return emptyTaskReports(taskCount);
113
+ }
114
+ });
115
+
116
+ assertFalse(configDiff.has("last_path"));
117
+ }
118
+
119
+ { // if files are empty, keep the previous last_path.
120
+ config.set("last_path", "in/bb");
121
+
122
+ ObjectListing listing = listing("in/aa", 0L);
123
+ doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
124
+
125
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
126
+ @Override
127
+ public List<TaskReport> run(TaskSource taskSource, int taskCount) {
128
+ assertEquals(0, taskCount);
129
+ assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
130
+ return emptyTaskReports(taskCount);
131
+ }
132
+ });
133
+
134
+ assertEquals("in/bb", configDiff.get(String.class, "last_path"));
135
+ }
136
+ }
137
+
138
+ @Test
139
+ public void checkS3FileInputByOpen()
140
+ throws Exception
141
+ {
142
+ doReturn(s3object("in/aa/a", "aa")).when(client).getObject(any(GetObjectRequest.class));
143
+ doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
144
+
145
+ PluginTask task = config.loadConfig(plugin.getTaskClass());
146
+ task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
147
+
148
+ StringBuilder sbuf = new StringBuilder();
149
+ try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
150
+ LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
151
+ while (d.nextFile()) {
152
+ sbuf.append(d.poll());
153
+ }
154
+ }
155
+ assertEquals("aa", sbuf.toString());
156
+ }
157
+
158
+ public static ConfigSource config()
159
+ {
160
+ return Exec.newConfigSource()
161
+ .set("bucket", "my_bucket")
162
+ .set("path_prefix", "my_path_prefix")
163
+ .set("access_key_id", "my_access_key_id")
164
+ .set("secret_access_key", "my_secret_access_key");
165
+ }
166
+
167
+ static ObjectListing listing(Object... keySizes)
168
+ {
169
+ ObjectListing listing = mock(ObjectListing.class);
170
+ if (keySizes == null) {
171
+ return listing;
172
+ }
173
+
174
+ List<S3ObjectSummary> s3objects = s3objects(keySizes);
175
+ doReturn(s3objects).when(listing).getObjectSummaries();
176
+ doReturn(null).when(listing).getNextMarker();
177
+ return listing;
178
+ }
179
+
180
+ static List<S3ObjectSummary> s3objects(Object... keySizes)
181
+ {
182
+ ImmutableList.Builder<S3ObjectSummary> builder = new ImmutableList.Builder<>();
183
+ if (keySizes == null) {
184
+ return builder.build();
185
+ }
186
+
187
+ for (int i = 0; i < keySizes.length; i += 2) {
188
+ String key = (String) keySizes[i];
189
+ long size = (Long) keySizes[i + 1];
190
+ builder.add(s3object(key, size));
191
+ }
192
+ return builder.build();
193
+ }
194
+
195
+ static S3ObjectSummary s3object(String key, long size)
196
+ {
197
+ S3ObjectSummary o = new S3ObjectSummary();
198
+ o.setKey(key);
199
+ o.setSize(size);
200
+ return o;
201
+ }
202
+
203
+ static S3Object s3object(String key, String value)
204
+ {
205
+ S3Object o = new S3Object();
206
+ o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
207
+ ObjectMetadata om = new ObjectMetadata();
208
+ om.setContentLength(value.length());
209
+ o.setObjectMetadata(om);
210
+ return o;
211
+ }
212
+
213
+ static List<TaskReport> emptyTaskReports(int taskCount)
36
214
  {
37
- S3ObjectSummary bucket = new S3ObjectSummary();
38
- bucket.setKey(key);
39
- bucket.setSize(size);
40
- return bucket;
215
+ ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
216
+ for (int i = 0; i < taskCount; i++) {
217
+ reports.add(Exec.newTaskReport());
218
+ }
219
+ return reports.build();
41
220
  }
42
221
  }
@@ -0,0 +1,63 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3Client;
4
+ import com.amazonaws.services.s3.model.GetObjectRequest;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
7
+ import org.junit.Before;
8
+ import org.junit.Rule;
9
+ import org.junit.Test;
10
+
11
+ import java.io.BufferedReader;
12
+ import java.io.InputStream;
13
+ import java.io.InputStreamReader;
14
+
15
+ import static org.junit.Assert.assertEquals;
16
+ import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
17
+ import static org.mockito.Matchers.any;
18
+ import static org.mockito.Mockito.doReturn;
19
+ import static org.mockito.Mockito.doThrow;
20
+ import static org.mockito.Mockito.mock;
21
+
22
+ public class TestS3InputStreamReopener
23
+ {
24
+ @Rule
25
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
26
+
27
+ private AmazonS3Client client;
28
+
29
+ @Before
30
+ public void createResources()
31
+ {
32
+ client = mock(AmazonS3Client.class);
33
+ }
34
+
35
+ @Test
36
+ public void reopenS3FileByReopener()
37
+ throws Exception
38
+ {
39
+ String content = "value";
40
+
41
+ { // not retry
42
+ doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
43
+
44
+ S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
45
+
46
+ try (InputStream in = opener.reopen(0, new RuntimeException())) {
47
+ BufferedReader r = new BufferedReader(new InputStreamReader(in));
48
+ assertEquals("value", r.readLine());
49
+ }
50
+ }
51
+
52
+ { // retry once
53
+ doThrow(new RuntimeException()).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
54
+
55
+ S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
56
+
57
+ try (InputStream in = opener.reopen(0, new RuntimeException())) {
58
+ BufferedReader r = new BufferedReader(new InputStreamReader(in));
59
+ assertEquals("value", r.readLine());
60
+ }
61
+ }
62
+ }
63
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-19 00:00:00.000000000 Z
11
+ date: 2015-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -50,14 +50,15 @@ files:
50
50
  - src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
51
51
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
52
52
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
53
+ - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
53
54
  - classpath/aws-java-sdk-core-1.9.22.jar
54
55
  - classpath/aws-java-sdk-kms-1.9.22.jar
55
56
  - classpath/aws-java-sdk-s3-1.9.22.jar
56
57
  - classpath/commons-codec-1.6.jar
57
- - classpath/commons-logging-1.1.3.jar
58
- - classpath/embulk-input-s3-0.2.2.jar
58
+ - classpath/embulk-input-s3-0.2.3.jar
59
59
  - classpath/httpclient-4.3.4.jar
60
60
  - classpath/httpcore-4.3.2.jar
61
+ - classpath/jcl-over-slf4j-1.7.12.jar
61
62
  - classpath/joda-time-2.8.2.jar
62
63
  homepage: https://github.com/embulk/embulk-input-s3
63
64
  licenses:
Binary file
Binary file