embulk-input-s3 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c12721e3646d935cb83dd17b9200134cab108b1e
4
- data.tar.gz: 0237d9c3e8e7952f52143893a101e3a3e08cc80e
3
+ metadata.gz: f03ab171561f7cca3ff6fe8e17c63b73f31e2db3
4
+ data.tar.gz: a636f66a873c449741f09c51a7b415f550517d26
5
5
  SHA512:
6
- metadata.gz: edb9e5d400d0de2c741e7aecd4654695976a856ecaedf87592b4cd65b25c5682b3b677bfd40aebc1e5e0f771e61d6f863768120edc971a9e1f2255ad2f947c3f
7
- data.tar.gz: 223d01c003770e0c78ba646f7cd9d5c24d3ba26ffd1ba710ff62b88efbdffaa892c43857b2d95c0d1b3400d2f68e77678e75c8738bdd2c40b2880ff1bdd9a29d
6
+ metadata.gz: 24baf759d3956a23317ed73f5878d9ec8d66bc493ba344131a16b325fb3558dd5fbab332dacfcac2f7abc89ecd3518cec89f4c232c7d17135159fa95fb444fac
7
+ data.tar.gz: c83e6f98d5d03e45f83641d879a80c41be41d292d999a1b90ab783ab02f296faa77a3a2276b447804e78dd2908a8b1f7cc026105688eff34fea0fdbbde92130b
Binary file
Binary file
@@ -6,6 +6,8 @@ import java.util.Collections;
6
6
  import java.io.IOException;
7
7
  import java.io.InterruptedIOException;
8
8
  import java.io.InputStream;
9
+
10
+ import com.google.common.annotations.VisibleForTesting;
9
11
  import com.google.common.collect.ImmutableList;
10
12
  import com.google.common.base.Optional;
11
13
  import com.google.common.base.Throwables;
@@ -212,7 +214,8 @@ public abstract class AbstractS3FileInputPlugin
212
214
  return new S3FileInput(task, taskIndex);
213
215
  }
214
216
 
215
- private static class S3InputStreamReopener
217
+ @VisibleForTesting
218
+ static class S3InputStreamReopener
216
219
  implements ResumableInputStream.Reopener
217
220
  {
218
221
  private final Logger log = Exec.getLogger(S3InputStreamReopener.class);
@@ -1,42 +1,221 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import static org.junit.Assert.*;
4
- import java.util.List;
5
- import org.junit.Test;
6
- import org.mockito.Mockito;
7
-
8
- import com.google.common.base.Optional;
9
- import com.google.common.collect.ImmutableList;
10
3
  import com.amazonaws.services.s3.AmazonS3Client;
4
+ import com.amazonaws.services.s3.model.GetObjectRequest;
11
5
  import com.amazonaws.services.s3.model.ListObjectsRequest;
12
6
  import com.amazonaws.services.s3.model.ObjectListing;
7
+ import com.amazonaws.services.s3.model.ObjectMetadata;
8
+ import com.amazonaws.services.s3.model.S3Object;
9
+ import com.amazonaws.services.s3.model.S3ObjectInputStream;
13
10
  import com.amazonaws.services.s3.model.S3ObjectSummary;
11
+ import com.google.common.base.Optional;
12
+ import com.google.common.collect.ImmutableList;
13
+ import org.embulk.EmbulkTestRuntime;
14
+ import org.embulk.config.ConfigDiff;
15
+ import org.embulk.config.ConfigSource;
16
+ import org.embulk.config.TaskReport;
17
+ import org.embulk.config.TaskSource;
18
+ import org.embulk.input.s3.AbstractS3FileInputPlugin.PluginTask;
19
+ import org.embulk.input.s3.AbstractS3FileInputPlugin.S3FileInput;
20
+ import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
21
+ import org.embulk.spi.Exec;
22
+ import org.embulk.spi.FileInputPlugin;
23
+ import org.embulk.spi.util.LineDecoder;
24
+ import org.junit.Before;
25
+ import org.junit.Rule;
26
+ import org.junit.Test;
27
+
28
+ import java.io.ByteArrayInputStream;
29
+ import java.util.Arrays;
30
+ import java.util.List;
31
+
32
+ import static org.junit.Assert.*;
33
+ import static org.mockito.Matchers.any;
34
+ import static org.mockito.Mockito.doReturn;
35
+ import static org.mockito.Mockito.mock;
36
+ import static org.mockito.Mockito.spy;
14
37
 
15
38
  public class TestS3FileInputPlugin
16
39
  {
40
+ @Rule
41
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
42
+
43
+ private ConfigSource config;
44
+ private S3FileInputPlugin plugin;
45
+ private AmazonS3Client client;
46
+
47
+ @Before
48
+ public void createResources()
49
+ {
50
+ config = config();
51
+ plugin = spy(runtime.getInstance(S3FileInputPlugin.class));
52
+ client = mock(AmazonS3Client.class);
53
+ }
54
+
55
+ @Test
56
+ public void checkS3ClientCreatedSuccessfully()
57
+ {
58
+ PluginTask task = config().loadConfig(plugin.getTaskClass());
59
+ plugin.newS3Client(task);
60
+ }
61
+
17
62
  @Test
18
63
  public void listS3FilesByPrefix()
19
64
  {
20
65
  // AWSS3Client returns list1 for the first iteration and list2 next.
21
- List<S3ObjectSummary> list1 = ImmutableList.<S3ObjectSummary> of(bucket("in/", 0), bucket("in/file/", 0),
22
- bucket("in/file/sample.csv.gz", 12345));
23
- List<S3ObjectSummary> list2 = ImmutableList.<S3ObjectSummary> of(bucket("sample2.csv.gz", 0));
24
- ObjectListing ol = Mockito.mock(ObjectListing.class);
66
+ List<S3ObjectSummary> list1 = s3objects("in/", 0L, "in/file/", 0L, "in/file/sample.csv.gz", 12345L);
67
+ List<S3ObjectSummary> list2 = s3objects("sample2.csv.gz", 0L);
68
+ ObjectListing ol = mock(ObjectListing.class);
25
69
 
26
- Mockito.doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
27
- AmazonS3Client client = Mockito.mock(AmazonS3Client.class);
28
- Mockito.doReturn(ol).when(client).listObjects(Mockito.any(ListObjectsRequest.class));
29
- Mockito.doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
70
+ doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
71
+ doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
72
+ doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
30
73
 
31
74
  // It counts only size != 0 files.
32
75
  assertEquals(1, S3FileInputPlugin.listS3FilesByPrefix(client, "bucketName", "prefix", Optional.<String>absent()).size());
33
76
  }
34
77
 
35
- private S3ObjectSummary bucket(String key, long size)
78
+ @Test
79
+ public void checkLastPath()
80
+ {
81
+ doReturn(null).when(client).listObjects(any(ListObjectsRequest.class));
82
+ doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
83
+
84
+ { // set a last file to last_path
85
+ ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
86
+ doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
87
+
88
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
89
+ @Override
90
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
91
+ {
92
+ assertEquals(3, taskCount);
93
+ List<String> files = taskSource.loadTask(S3PluginTask.class).getFiles();
94
+ assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
95
+ return emptyTaskReports(taskCount);
96
+ }
97
+ });
98
+
99
+ assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
100
+ }
101
+
102
+ { // if files are empty and last_path is not specified, last_path is not set.
103
+ ObjectListing listing = listing("in/aa", 0L);
104
+ doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
105
+
106
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
107
+ @Override
108
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
109
+ {
110
+ assertEquals(0, taskCount);
111
+ assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
112
+ return emptyTaskReports(taskCount);
113
+ }
114
+ });
115
+
116
+ assertFalse(configDiff.has("last_path"));
117
+ }
118
+
119
+ { // if files are empty, keep the previous last_path.
120
+ config.set("last_path", "in/bb");
121
+
122
+ ObjectListing listing = listing("in/aa", 0L);
123
+ doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
124
+
125
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
126
+ @Override
127
+ public List<TaskReport> run(TaskSource taskSource, int taskCount) {
128
+ assertEquals(0, taskCount);
129
+ assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
130
+ return emptyTaskReports(taskCount);
131
+ }
132
+ });
133
+
134
+ assertEquals("in/bb", configDiff.get(String.class, "last_path"));
135
+ }
136
+ }
137
+
138
+ @Test
139
+ public void checkS3FileInputByOpen()
140
+ throws Exception
141
+ {
142
+ doReturn(s3object("in/aa/a", "aa")).when(client).getObject(any(GetObjectRequest.class));
143
+ doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
144
+
145
+ PluginTask task = config.loadConfig(plugin.getTaskClass());
146
+ task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
147
+
148
+ StringBuilder sbuf = new StringBuilder();
149
+ try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
150
+ LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
151
+ while (d.nextFile()) {
152
+ sbuf.append(d.poll());
153
+ }
154
+ }
155
+ assertEquals("aa", sbuf.toString());
156
+ }
157
+
158
+ public static ConfigSource config()
159
+ {
160
+ return Exec.newConfigSource()
161
+ .set("bucket", "my_bucket")
162
+ .set("path_prefix", "my_path_prefix")
163
+ .set("access_key_id", "my_access_key_id")
164
+ .set("secret_access_key", "my_secret_access_key");
165
+ }
166
+
167
+ static ObjectListing listing(Object... keySizes)
168
+ {
169
+ ObjectListing listing = mock(ObjectListing.class);
170
+ if (keySizes == null) {
171
+ return listing;
172
+ }
173
+
174
+ List<S3ObjectSummary> s3objects = s3objects(keySizes);
175
+ doReturn(s3objects).when(listing).getObjectSummaries();
176
+ doReturn(null).when(listing).getNextMarker();
177
+ return listing;
178
+ }
179
+
180
+ static List<S3ObjectSummary> s3objects(Object... keySizes)
181
+ {
182
+ ImmutableList.Builder<S3ObjectSummary> builder = new ImmutableList.Builder<>();
183
+ if (keySizes == null) {
184
+ return builder.build();
185
+ }
186
+
187
+ for (int i = 0; i < keySizes.length; i += 2) {
188
+ String key = (String) keySizes[i];
189
+ long size = (Long) keySizes[i + 1];
190
+ builder.add(s3object(key, size));
191
+ }
192
+ return builder.build();
193
+ }
194
+
195
+ static S3ObjectSummary s3object(String key, long size)
196
+ {
197
+ S3ObjectSummary o = new S3ObjectSummary();
198
+ o.setKey(key);
199
+ o.setSize(size);
200
+ return o;
201
+ }
202
+
203
+ static S3Object s3object(String key, String value)
204
+ {
205
+ S3Object o = new S3Object();
206
+ o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
207
+ ObjectMetadata om = new ObjectMetadata();
208
+ om.setContentLength(value.length());
209
+ o.setObjectMetadata(om);
210
+ return o;
211
+ }
212
+
213
+ static List<TaskReport> emptyTaskReports(int taskCount)
36
214
  {
37
- S3ObjectSummary bucket = new S3ObjectSummary();
38
- bucket.setKey(key);
39
- bucket.setSize(size);
40
- return bucket;
215
+ ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
216
+ for (int i = 0; i < taskCount; i++) {
217
+ reports.add(Exec.newTaskReport());
218
+ }
219
+ return reports.build();
41
220
  }
42
221
  }
@@ -0,0 +1,63 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3Client;
4
+ import com.amazonaws.services.s3.model.GetObjectRequest;
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
7
+ import org.junit.Before;
8
+ import org.junit.Rule;
9
+ import org.junit.Test;
10
+
11
+ import java.io.BufferedReader;
12
+ import java.io.InputStream;
13
+ import java.io.InputStreamReader;
14
+
15
+ import static org.junit.Assert.assertEquals;
16
+ import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
17
+ import static org.mockito.Matchers.any;
18
+ import static org.mockito.Mockito.doReturn;
19
+ import static org.mockito.Mockito.doThrow;
20
+ import static org.mockito.Mockito.mock;
21
+
22
+ public class TestS3InputStreamReopener
23
+ {
24
+ @Rule
25
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
26
+
27
+ private AmazonS3Client client;
28
+
29
+ @Before
30
+ public void createResources()
31
+ {
32
+ client = mock(AmazonS3Client.class);
33
+ }
34
+
35
+ @Test
36
+ public void reopenS3FileByReopener()
37
+ throws Exception
38
+ {
39
+ String content = "value";
40
+
41
+ { // not retry
42
+ doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
43
+
44
+ S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
45
+
46
+ try (InputStream in = opener.reopen(0, new RuntimeException())) {
47
+ BufferedReader r = new BufferedReader(new InputStreamReader(in));
48
+ assertEquals("value", r.readLine());
49
+ }
50
+ }
51
+
52
+ { // retry once
53
+ doThrow(new RuntimeException()).doReturn(s3object("in/aa/a", content)).when(client).getObject(any(GetObjectRequest.class));
54
+
55
+ S3InputStreamReopener opener = new S3InputStreamReopener(client, new GetObjectRequest("my_bucket", "in/aa/a"), content.length());
56
+
57
+ try (InputStream in = opener.reopen(0, new RuntimeException())) {
58
+ BufferedReader r = new BufferedReader(new InputStreamReader(in));
59
+ assertEquals("value", r.readLine());
60
+ }
61
+ }
62
+ }
63
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-19 00:00:00.000000000 Z
11
+ date: 2015-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -50,14 +50,15 @@ files:
50
50
  - src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
51
51
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
52
52
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
53
+ - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
53
54
  - classpath/aws-java-sdk-core-1.9.22.jar
54
55
  - classpath/aws-java-sdk-kms-1.9.22.jar
55
56
  - classpath/aws-java-sdk-s3-1.9.22.jar
56
57
  - classpath/commons-codec-1.6.jar
57
- - classpath/commons-logging-1.1.3.jar
58
- - classpath/embulk-input-s3-0.2.2.jar
58
+ - classpath/embulk-input-s3-0.2.3.jar
59
59
  - classpath/httpclient-4.3.4.jar
60
60
  - classpath/httpcore-4.3.2.jar
61
+ - classpath/jcl-over-slf4j-1.7.12.jar
61
62
  - classpath/joda-time-2.8.2.jar
62
63
  homepage: https://github.com/embulk/embulk-input-s3
63
64
  licenses:
Binary file
Binary file