embulk-input-s3 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.3.4.jar +0 -0
- data/classpath/{embulk-util-aws-credentials-0.3.3.jar → embulk-util-aws-credentials-0.3.4.jar} +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +73 -114
- data/src/main/java/org/embulk/input/s3/DefaultRetryable.java +1 -1
- data/src/main/java/org/embulk/input/s3/explorer/S3FileExplorer.java +21 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3NameOrderPrefixFileExplorer.java +45 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3PrefixFileExplorer.java +57 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3SingleFileExplorer.java +35 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3TimeOrderPrefixFileExplorer.java +70 -0
- data/src/main/java/org/embulk/input/s3/utils/DateUtils.java +28 -0
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +0 -53
- data/src/test/java/org/embulk/input/s3/explorer/TestS3NameOrderPrefixFileExplorer.java +67 -0
- data/src/test/java/org/embulk/input/s3/explorer/TestS3PrefixFileExplorer.java +128 -0
- data/src/test/java/org/embulk/input/s3/explorer/TestS3SingleFileExplorer.java +56 -0
- data/src/test/java/org/embulk/input/s3/explorer/TestS3TimeOrderPrefixFileExplorer.java +112 -0
- metadata +14 -5
- data/classpath/embulk-input-s3-0.3.3.jar +0 -0
- data/src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java +0 -164
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0497b4779ac08c091c1291583ef439ada4f48ea2
|
4
|
+
data.tar.gz: 0ef8f1d26751cf22d7975b570d9f7cbcfd7e270f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82310a7bae6f789ad0962346438a945b4ed59a21fc34be6bbd8e705f979482be58994a1d5d7258f07020ad72cc8dee240313b569b04081f989299a18845dbce5
|
7
|
+
data.tar.gz: 5a41d741bb26cd0d619149c8c1d4b47495c167570f450460e198a129c1b91ebc9f55b45eb771e3b58de6a69bd58f3f52029f0d272200c46f001b3ff6e24ecd13
|
Binary file
|
data/classpath/{embulk-util-aws-credentials-0.3.3.jar → embulk-util-aws-credentials-0.3.4.jar}
RENAMED
Binary file
|
@@ -7,15 +7,9 @@ import com.amazonaws.auth.AWSCredentialsProvider;
|
|
7
7
|
import com.amazonaws.retry.PredefinedRetryPolicies;
|
8
8
|
import com.amazonaws.services.s3.AmazonS3;
|
9
9
|
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
|
10
|
-
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
11
10
|
import com.amazonaws.services.s3.model.GetObjectRequest;
|
12
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
13
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
14
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
15
11
|
import com.amazonaws.services.s3.model.S3Object;
|
16
12
|
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
17
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
18
|
-
import com.amazonaws.services.s3.model.StorageClass;
|
19
13
|
import com.google.common.annotations.VisibleForTesting;
|
20
14
|
import org.embulk.config.Config;
|
21
15
|
import org.embulk.config.ConfigDefault;
|
@@ -26,6 +20,10 @@ import org.embulk.config.ConfigSource;
|
|
26
20
|
import org.embulk.config.Task;
|
27
21
|
import org.embulk.config.TaskReport;
|
28
22
|
import org.embulk.config.TaskSource;
|
23
|
+
import org.embulk.input.s3.explorer.S3NameOrderPrefixFileExplorer;
|
24
|
+
import org.embulk.input.s3.explorer.S3SingleFileExplorer;
|
25
|
+
import org.embulk.input.s3.explorer.S3TimeOrderPrefixFileExplorer;
|
26
|
+
import org.embulk.input.s3.utils.DateUtils;
|
29
27
|
import org.embulk.spi.BufferAllocator;
|
30
28
|
import org.embulk.spi.Exec;
|
31
29
|
import org.embulk.spi.FileInputPlugin;
|
@@ -40,6 +38,9 @@ import org.slf4j.Logger;
|
|
40
38
|
|
41
39
|
import java.io.IOException;
|
42
40
|
import java.io.InputStream;
|
41
|
+
import java.text.SimpleDateFormat;
|
42
|
+
import java.util.Collections;
|
43
|
+
import java.util.Date;
|
43
44
|
import java.util.Iterator;
|
44
45
|
import java.util.List;
|
45
46
|
import java.util.Optional;
|
@@ -51,6 +52,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
51
52
|
implements FileInputPlugin
|
52
53
|
{
|
53
54
|
private static final Logger LOGGER = Exec.getLogger(S3FileInputPlugin.class);
|
55
|
+
private static final String FULL_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
|
54
56
|
|
55
57
|
public interface PluginTask
|
56
58
|
extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
|
@@ -88,12 +90,35 @@ public abstract class AbstractS3FileInputPlugin
|
|
88
90
|
@ConfigDefault("false")
|
89
91
|
boolean getSkipGlacierObjects();
|
90
92
|
|
93
|
+
@Config("use_modified_time")
|
94
|
+
@ConfigDefault("false")
|
95
|
+
boolean getUseModifiedTime();
|
96
|
+
|
97
|
+
@Config("last_modified_time")
|
98
|
+
@ConfigDefault("null")
|
99
|
+
Optional<String> getLastModifiedTime();
|
100
|
+
|
91
101
|
// TODO timeout, ssl, etc
|
92
102
|
|
103
|
+
////////////////////////////////////////
|
104
|
+
// Internal configurations
|
105
|
+
////////////////////////////////////////
|
106
|
+
|
93
107
|
FileList getFiles();
|
94
108
|
|
95
109
|
void setFiles(FileList files);
|
96
110
|
|
111
|
+
/**
|
112
|
+
* end_modified_time is conditionally set if modified_time mode is enabled.
|
113
|
+
*
|
114
|
+
* It is internal state and must not be set in config.yml
|
115
|
+
*/
|
116
|
+
@Config("__end_modified_time")
|
117
|
+
@ConfigDefault("null")
|
118
|
+
Optional<Date> getEndModifiedTime();
|
119
|
+
|
120
|
+
void setEndModifiedTime(Optional<Date> endModifiedTime);
|
121
|
+
|
97
122
|
@ConfigInject
|
98
123
|
BufferAllocator getBufferAllocator();
|
99
124
|
}
|
@@ -105,6 +130,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
105
130
|
{
|
106
131
|
PluginTask task = config.loadConfig(getTaskClass());
|
107
132
|
|
133
|
+
errorIfInternalParamsAreSet(task);
|
108
134
|
validateInputTask(task);
|
109
135
|
// list files recursively
|
110
136
|
task.setFiles(listFiles(task));
|
@@ -130,9 +156,15 @@ public abstract class AbstractS3FileInputPlugin
|
|
130
156
|
|
131
157
|
// last_path
|
132
158
|
if (task.getIncremental()) {
|
133
|
-
|
134
|
-
|
135
|
-
|
159
|
+
if (task.getUseModifiedTime()) {
|
160
|
+
Date endModifiedTime = task.getEndModifiedTime().orElse(new Date());
|
161
|
+
configDiff.set("last_modified_time", new SimpleDateFormat(FULL_DATE_FORMAT).format(endModifiedTime));
|
162
|
+
}
|
163
|
+
else {
|
164
|
+
Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
|
165
|
+
LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
|
166
|
+
configDiff.set("last_path", lastPath);
|
167
|
+
}
|
136
168
|
}
|
137
169
|
return configDiff;
|
138
170
|
}
|
@@ -237,22 +269,35 @@ public abstract class AbstractS3FileInputPlugin
|
|
237
269
|
String bucketName = task.getBucket();
|
238
270
|
FileList.Builder builder = new FileList.Builder(task);
|
239
271
|
RetryExecutor retryExec = retryExecutorFrom(task);
|
272
|
+
|
240
273
|
if (task.getPath().isPresent()) {
|
241
274
|
LOGGER.info("Start getting object with path: [{}]", task.getPath().get());
|
242
|
-
|
275
|
+
new S3SingleFileExplorer(bucketName, client, retryExec, task.getPath().get()).addToBuilder(builder);
|
276
|
+
return builder.build();
|
243
277
|
}
|
244
|
-
else {
|
245
|
-
// does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
|
246
|
-
LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
|
247
|
-
if (task.getPathPrefix().get().equals("/")) {
|
248
|
-
LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
|
249
|
-
}
|
250
278
|
|
251
|
-
|
252
|
-
|
253
|
-
|
279
|
+
// does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
|
280
|
+
LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
|
281
|
+
if (task.getPathPrefix().get().equals("/")) {
|
282
|
+
LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
|
254
283
|
}
|
255
284
|
|
285
|
+
if (task.getUseModifiedTime()) {
|
286
|
+
Date now = new Date();
|
287
|
+
Optional<Date> from = task.getLastModifiedTime().isPresent()
|
288
|
+
? Optional.of(DateUtils.parse(task.getLastModifiedTime().get(), Collections.singletonList(FULL_DATE_FORMAT)))
|
289
|
+
: Optional.empty();
|
290
|
+
task.setEndModifiedTime(Optional.of(now));
|
291
|
+
|
292
|
+
new S3TimeOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
|
293
|
+
task.getSkipGlacierObjects(), from, now).addToBuilder(builder);
|
294
|
+
}
|
295
|
+
else {
|
296
|
+
new S3NameOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
|
297
|
+
task.getSkipGlacierObjects(), task.getLastPath().orElse(null)).addToBuilder(builder);
|
298
|
+
}
|
299
|
+
|
300
|
+
LOGGER.info("Found total [{}] files", builder.size());
|
256
301
|
return builder.build();
|
257
302
|
}
|
258
303
|
catch (AmazonServiceException ex) {
|
@@ -268,107 +313,13 @@ public abstract class AbstractS3FileInputPlugin
|
|
268
313
|
}
|
269
314
|
}
|
270
315
|
|
271
|
-
|
272
|
-
public void addS3DirectObject(FileList.Builder builder,
|
273
|
-
final AmazonS3 client,
|
274
|
-
String bucket,
|
275
|
-
String objectKey)
|
276
|
-
{
|
277
|
-
addS3DirectObject(builder, client, bucket, objectKey, null);
|
278
|
-
}
|
279
|
-
|
280
|
-
@VisibleForTesting
|
281
|
-
public void addS3DirectObject(FileList.Builder builder,
|
282
|
-
final AmazonS3 client,
|
283
|
-
String bucket,
|
284
|
-
String objectKey,
|
285
|
-
RetryExecutor retryExec)
|
286
|
-
{
|
287
|
-
final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucket, objectKey);
|
288
|
-
|
289
|
-
ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
|
290
|
-
@Override
|
291
|
-
public ObjectMetadata call()
|
292
|
-
{
|
293
|
-
return client.getObjectMetadata(objectMetadataRequest);
|
294
|
-
}
|
295
|
-
}.executeWith(retryExec);
|
296
|
-
|
297
|
-
builder.add(objectKey, objectMetadata.getContentLength());
|
298
|
-
}
|
299
|
-
|
300
|
-
private void validateInputTask(PluginTask task)
|
316
|
+
private void validateInputTask(final PluginTask task)
|
301
317
|
{
|
302
318
|
if (!task.getPathPrefix().isPresent() && !task.getPath().isPresent()) {
|
303
319
|
throw new ConfigException("Either path or path_prefix is required");
|
304
320
|
}
|
305
321
|
}
|
306
322
|
|
307
|
-
@VisibleForTesting
|
308
|
-
public static void listS3FilesByPrefix(FileList.Builder builder,
|
309
|
-
final AmazonS3 client,
|
310
|
-
String bucketName,
|
311
|
-
String prefix,
|
312
|
-
Optional<String> lastPath,
|
313
|
-
boolean skipGlacierObjects)
|
314
|
-
{
|
315
|
-
listS3FilesByPrefix(builder, client, bucketName, prefix, lastPath, skipGlacierObjects, null);
|
316
|
-
}
|
317
|
-
|
318
|
-
/**
|
319
|
-
* Lists S3 filenames filtered by prefix.
|
320
|
-
* <p>
|
321
|
-
* The resulting list does not include the file that's size == 0.
|
322
|
-
* @param builder custom Filelist builder
|
323
|
-
* @param client Amazon S3
|
324
|
-
* @param bucketName Amazon S3 bucket name
|
325
|
-
* @param prefix Amazon S3 bucket name prefix
|
326
|
-
* @param lastPath last path
|
327
|
-
* @param skipGlacierObjects skip gracier objects
|
328
|
-
* @param retryExec a retry executor object to do the retrying
|
329
|
-
*/
|
330
|
-
@VisibleForTesting
|
331
|
-
public static void listS3FilesByPrefix(FileList.Builder builder,
|
332
|
-
final AmazonS3 client,
|
333
|
-
String bucketName,
|
334
|
-
String prefix,
|
335
|
-
Optional<String> lastPath,
|
336
|
-
boolean skipGlacierObjects,
|
337
|
-
RetryExecutor retryExec)
|
338
|
-
{
|
339
|
-
String lastKey = lastPath.orElse(null);
|
340
|
-
do {
|
341
|
-
final String finalLastKey = lastKey;
|
342
|
-
final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
|
343
|
-
ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects") {
|
344
|
-
@Override
|
345
|
-
public ObjectListing call()
|
346
|
-
{
|
347
|
-
return client.listObjects(req);
|
348
|
-
}
|
349
|
-
}.executeWith(retryExec);
|
350
|
-
for (S3ObjectSummary s : ol.getObjectSummaries()) {
|
351
|
-
if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
|
352
|
-
if (skipGlacierObjects) {
|
353
|
-
Exec.getLogger("AbstractS3FileInputPlugin.class").warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
|
354
|
-
continue;
|
355
|
-
}
|
356
|
-
else {
|
357
|
-
throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
|
358
|
-
}
|
359
|
-
}
|
360
|
-
if (s.getSize() > 0) {
|
361
|
-
builder.add(s.getKey(), s.getSize());
|
362
|
-
if (!builder.needsMore()) {
|
363
|
-
LOGGER.warn("Too many files matched, stop listing file");
|
364
|
-
return;
|
365
|
-
}
|
366
|
-
}
|
367
|
-
}
|
368
|
-
lastKey = ol.getNextMarker();
|
369
|
-
} while (lastKey != null);
|
370
|
-
}
|
371
|
-
|
372
323
|
@Override
|
373
324
|
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
374
325
|
{
|
@@ -440,6 +391,14 @@ public abstract class AbstractS3FileInputPlugin
|
|
440
391
|
}
|
441
392
|
}
|
442
393
|
|
394
|
+
@VisibleForTesting
|
395
|
+
static void errorIfInternalParamsAreSet(PluginTask task)
|
396
|
+
{
|
397
|
+
if (task.getEndModifiedTime().isPresent()) {
|
398
|
+
throw new ConfigException("'__end_modified_time' must not be set.");
|
399
|
+
}
|
400
|
+
}
|
401
|
+
|
443
402
|
// TODO create single-file InputStreamFileInput utility
|
444
403
|
private class SingleFileProvider
|
445
404
|
implements InputStreamFileInput.Provider
|
@@ -19,7 +19,7 @@ import static org.embulk.spi.util.RetryExecutor.Retryable;
|
|
19
19
|
* Retryable utility, regardless the occurred exceptions,
|
20
20
|
* Also provide a default approach for exception propagation.
|
21
21
|
*/
|
22
|
-
class DefaultRetryable<T> implements Retryable<T>
|
22
|
+
public class DefaultRetryable<T> implements Retryable<T>
|
23
23
|
{
|
24
24
|
private static final Logger log = Exec.getLogger(DefaultRetryable.class);
|
25
25
|
private static final Set<Integer> NONRETRYABLE_STATUS_CODES = new HashSet<Integer>(2);
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import org.embulk.input.s3.FileList;
|
5
|
+
import org.embulk.spi.util.RetryExecutor;
|
6
|
+
|
7
|
+
public abstract class S3FileExplorer
|
8
|
+
{
|
9
|
+
protected String bucketName;
|
10
|
+
protected AmazonS3 s3Client;
|
11
|
+
protected RetryExecutor retryExecutor;
|
12
|
+
|
13
|
+
public S3FileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor)
|
14
|
+
{
|
15
|
+
this.bucketName = bucketName;
|
16
|
+
this.s3Client = s3Client;
|
17
|
+
this.retryExecutor = retryExecutor;
|
18
|
+
}
|
19
|
+
|
20
|
+
public abstract void addToBuilder(FileList.Builder builder);
|
21
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
7
|
+
import org.embulk.input.s3.DefaultRetryable;
|
8
|
+
import org.embulk.spi.util.RetryExecutor;
|
9
|
+
|
10
|
+
import java.util.List;
|
11
|
+
|
12
|
+
public class S3NameOrderPrefixFileExplorer extends S3PrefixFileExplorer
|
13
|
+
{
|
14
|
+
private String lastPath;
|
15
|
+
|
16
|
+
public S3NameOrderPrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor,
|
17
|
+
final String pathPrefix, final boolean skipGlacierObjects, final String lastPath)
|
18
|
+
{
|
19
|
+
super(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects);
|
20
|
+
this.lastPath = lastPath;
|
21
|
+
}
|
22
|
+
|
23
|
+
@Override
|
24
|
+
protected List<S3ObjectSummary> fetch()
|
25
|
+
{
|
26
|
+
final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
|
27
|
+
final ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects")
|
28
|
+
{
|
29
|
+
@Override
|
30
|
+
public ObjectListing call()
|
31
|
+
{
|
32
|
+
return s3Client.listObjects(req);
|
33
|
+
}
|
34
|
+
}.executeWith(retryExecutor);
|
35
|
+
lastPath = ol.getNextMarker();
|
36
|
+
|
37
|
+
return ol.getObjectSummaries();
|
38
|
+
}
|
39
|
+
|
40
|
+
@Override
|
41
|
+
protected boolean hasNext()
|
42
|
+
{
|
43
|
+
return lastPath != null;
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
5
|
+
import com.amazonaws.services.s3.model.StorageClass;
|
6
|
+
import org.embulk.config.ConfigException;
|
7
|
+
import org.embulk.input.s3.FileList;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.util.RetryExecutor;
|
10
|
+
import org.slf4j.Logger;
|
11
|
+
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
public abstract class S3PrefixFileExplorer extends S3FileExplorer
|
15
|
+
{
|
16
|
+
private static final Logger LOGGER = Exec.getLogger(S3PrefixFileExplorer.class);
|
17
|
+
|
18
|
+
protected String pathPrefix;
|
19
|
+
|
20
|
+
private final boolean skipGlacierObjects;
|
21
|
+
|
22
|
+
public S3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
|
23
|
+
{
|
24
|
+
super(bucketName, s3Client, retryExecutor);
|
25
|
+
this.pathPrefix = pathPrefix;
|
26
|
+
this.skipGlacierObjects = skipGlacierObjects;
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public void addToBuilder(final FileList.Builder builder)
|
31
|
+
{
|
32
|
+
do {
|
33
|
+
final List<S3ObjectSummary> s3ObjectSummaries = fetch();
|
34
|
+
|
35
|
+
for (final S3ObjectSummary s : s3ObjectSummaries) {
|
36
|
+
if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
|
37
|
+
if (skipGlacierObjects) {
|
38
|
+
LOGGER.warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
|
39
|
+
continue;
|
40
|
+
}
|
41
|
+
throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
|
42
|
+
}
|
43
|
+
if (s.getSize() > 0) {
|
44
|
+
builder.add(s.getKey(), s.getSize());
|
45
|
+
if (!builder.needsMore()) {
|
46
|
+
LOGGER.warn("Too many files matched, stop listing file");
|
47
|
+
return;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
} while (hasNext());
|
52
|
+
}
|
53
|
+
|
54
|
+
protected abstract List<S3ObjectSummary> fetch();
|
55
|
+
|
56
|
+
protected abstract boolean hasNext();
|
57
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
6
|
+
import org.embulk.input.s3.DefaultRetryable;
|
7
|
+
import org.embulk.input.s3.FileList;
|
8
|
+
import org.embulk.spi.util.RetryExecutor;
|
9
|
+
|
10
|
+
public class S3SingleFileExplorer extends S3FileExplorer
|
11
|
+
{
|
12
|
+
private final String path;
|
13
|
+
|
14
|
+
public S3SingleFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor, final String path)
|
15
|
+
{
|
16
|
+
super(bucket, client, retryExecutor);
|
17
|
+
this.path = path;
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public void addToBuilder(final FileList.Builder builder)
|
22
|
+
{
|
23
|
+
final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucketName, path);
|
24
|
+
|
25
|
+
final ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
|
26
|
+
@Override
|
27
|
+
public ObjectMetadata call()
|
28
|
+
{
|
29
|
+
return s3Client.getObjectMetadata(objectMetadataRequest);
|
30
|
+
}
|
31
|
+
}.executeWith(retryExecutor);
|
32
|
+
|
33
|
+
builder.add(path, objectMetadata.getContentLength());
|
34
|
+
}
|
35
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
7
|
+
import org.apache.commons.lang3.StringUtils;
|
8
|
+
import org.embulk.input.s3.DefaultRetryable;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.embulk.spi.util.RetryExecutor;
|
11
|
+
import org.slf4j.Logger;
|
12
|
+
|
13
|
+
import java.util.Date;
|
14
|
+
import java.util.List;
|
15
|
+
import java.util.Optional;
|
16
|
+
import java.util.stream.Collectors;
|
17
|
+
|
18
|
+
public class S3TimeOrderPrefixFileExplorer extends S3PrefixFileExplorer
|
19
|
+
{
|
20
|
+
private static final Logger LOGGER = Exec.getLogger(S3TimeOrderPrefixFileExplorer.class);
|
21
|
+
|
22
|
+
private final Optional<Date> from;
|
23
|
+
private final Date to;
|
24
|
+
|
25
|
+
private String lastPath;
|
26
|
+
|
27
|
+
private int numOfReq = 0;
|
28
|
+
|
29
|
+
public S3TimeOrderPrefixFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor,
|
30
|
+
final String pathPrefix, final boolean skipGlacierObjects, final Optional<Date> from, final Date to)
|
31
|
+
{
|
32
|
+
super(bucket, client, retryExecutor, pathPrefix, skipGlacierObjects);
|
33
|
+
this.from = from;
|
34
|
+
this.to = to;
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public List<S3ObjectSummary> fetch()
|
39
|
+
{
|
40
|
+
++numOfReq;
|
41
|
+
|
42
|
+
final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
|
43
|
+
final ObjectListing objectListing = new DefaultRetryable<ObjectListing>("Listing objects")
|
44
|
+
{
|
45
|
+
@Override
|
46
|
+
public ObjectListing call()
|
47
|
+
{
|
48
|
+
return s3Client.listObjects(req);
|
49
|
+
}
|
50
|
+
}.executeWith(retryExecutor);
|
51
|
+
lastPath = objectListing.getNextMarker();
|
52
|
+
|
53
|
+
return objectListing.getObjectSummaries()
|
54
|
+
.stream()
|
55
|
+
.filter(s3ObjectSummary -> s3ObjectSummary.getLastModified().before(to)
|
56
|
+
&& (!from.isPresent() || s3ObjectSummary.getLastModified().equals(from.get()) || s3ObjectSummary.getLastModified().after(from.get())))
|
57
|
+
.collect(Collectors.toList());
|
58
|
+
}
|
59
|
+
|
60
|
+
@Override
|
61
|
+
public boolean hasNext()
|
62
|
+
{
|
63
|
+
if (lastPath == null) {
|
64
|
+
LOGGER.info("The total number of LIST requests is {}{}.", numOfReq,
|
65
|
+
numOfReq < 10 ? StringUtils.EMPTY : ". Clean up your s3 bucket to reduce the number of requests and improve the ingesting performance");
|
66
|
+
return false;
|
67
|
+
}
|
68
|
+
return true;
|
69
|
+
}
|
70
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
package org.embulk.input.s3.utils;
|
2
|
+
|
3
|
+
import com.google.common.base.Joiner;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.joda.time.format.DateTimeFormat;
|
6
|
+
|
7
|
+
import java.util.Date;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class DateUtils
|
11
|
+
{
|
12
|
+
public static Date parse(final String value, final List<String> supportedFormats)
|
13
|
+
throws ConfigException
|
14
|
+
{
|
15
|
+
for (final String fmt : supportedFormats) {
|
16
|
+
try {
|
17
|
+
return DateTimeFormat.forPattern(fmt).parseDateTime(value).toDate();
|
18
|
+
} catch (final IllegalArgumentException e) {
|
19
|
+
// ignorable exception
|
20
|
+
}
|
21
|
+
}
|
22
|
+
throw new ConfigException("Unsupported DateTime value: '" + value + "', supported formats: [" + Joiner.on(",").join(supportedFormats) + "]");
|
23
|
+
}
|
24
|
+
|
25
|
+
private DateUtils()
|
26
|
+
{
|
27
|
+
}
|
28
|
+
}
|
@@ -1,16 +1,11 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
3
|
import com.amazonaws.services.s3.AmazonS3;
|
4
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
6
4
|
import com.amazonaws.services.s3.model.Region;
|
7
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
8
|
-
import com.amazonaws.services.s3.model.StorageClass;
|
9
5
|
import com.google.common.collect.ImmutableList;
|
10
6
|
import com.google.common.collect.ImmutableMap;
|
11
7
|
import org.embulk.EmbulkTestRuntime;
|
12
8
|
import org.embulk.config.ConfigDiff;
|
13
|
-
import org.embulk.config.ConfigException;
|
14
9
|
import org.embulk.config.ConfigSource;
|
15
10
|
import org.embulk.config.TaskReport;
|
16
11
|
import org.embulk.config.TaskSource;
|
@@ -25,21 +20,15 @@ import org.junit.Before;
|
|
25
20
|
import org.junit.BeforeClass;
|
26
21
|
import org.junit.Rule;
|
27
22
|
import org.junit.Test;
|
28
|
-
import org.mockito.Mockito;
|
29
23
|
|
30
|
-
import java.lang.reflect.Field;
|
31
24
|
import java.util.ArrayList;
|
32
25
|
import java.util.List;
|
33
|
-
import java.util.Optional;
|
34
26
|
|
35
27
|
import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
36
28
|
import static org.junit.Assert.assertEquals;
|
37
29
|
import static org.junit.Assert.assertFalse;
|
38
30
|
import static org.junit.Assert.assertNull;
|
39
31
|
import static org.junit.Assume.assumeNotNull;
|
40
|
-
import static org.mockito.Matchers.any;
|
41
|
-
import static org.mockito.Mockito.doReturn;
|
42
|
-
import static org.mockito.Mockito.mock;
|
43
32
|
|
44
33
|
public class TestS3FileInputPlugin
|
45
34
|
{
|
@@ -97,7 +86,6 @@ public class TestS3FileInputPlugin
|
|
97
86
|
|
98
87
|
@Test
|
99
88
|
public void useLastPath()
|
100
|
-
throws Exception
|
101
89
|
{
|
102
90
|
ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
|
103
91
|
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
@@ -117,7 +105,6 @@ public class TestS3FileInputPlugin
|
|
117
105
|
|
118
106
|
@Test
|
119
107
|
public void emptyFilesWithLastPath()
|
120
|
-
throws Exception
|
121
108
|
{
|
122
109
|
ConfigSource config = this.config.deepCopy()
|
123
110
|
.set("path_prefix", "empty_files_prefix")
|
@@ -130,7 +117,6 @@ public class TestS3FileInputPlugin
|
|
130
117
|
|
131
118
|
@Test
|
132
119
|
public void useTotalFileCountLimit()
|
133
|
-
throws Exception
|
134
120
|
{
|
135
121
|
ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
|
136
122
|
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
@@ -141,7 +127,6 @@ public class TestS3FileInputPlugin
|
|
141
127
|
|
142
128
|
@Test
|
143
129
|
public void usePathMatchPattern()
|
144
|
-
throws Exception
|
145
130
|
{
|
146
131
|
{ // match pattern
|
147
132
|
ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/sample_01");
|
@@ -227,44 +212,6 @@ public class TestS3FileInputPlugin
|
|
227
212
|
assertEquals(s3Client.getRegion(), Region.US_Standard);
|
228
213
|
}
|
229
214
|
|
230
|
-
@Test(expected = ConfigException.class)
|
231
|
-
public void useSkipGlacierObjects() throws Exception
|
232
|
-
{
|
233
|
-
AmazonS3 client;
|
234
|
-
client = mock(AmazonS3.class);
|
235
|
-
doReturn(s3objectList("in/aa/a", StorageClass.Glacier)).when(client).listObjects(any(ListObjectsRequest.class));
|
236
|
-
|
237
|
-
AbstractS3FileInputPlugin plugin = Mockito.mock(AbstractS3FileInputPlugin.class, Mockito.CALLS_REAL_METHODS);
|
238
|
-
plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional.empty(), false);
|
239
|
-
}
|
240
|
-
|
241
|
-
private FileList.Builder newFileList(ConfigSource config, Object... nameAndSize)
|
242
|
-
{
|
243
|
-
FileList.Builder builder = new FileList.Builder(config);
|
244
|
-
for (int i = 0; i < nameAndSize.length; i += 2) {
|
245
|
-
builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
|
246
|
-
}
|
247
|
-
return builder;
|
248
|
-
}
|
249
|
-
|
250
|
-
private ObjectListing s3objectList(String key, StorageClass storageClass) throws Exception
|
251
|
-
{
|
252
|
-
ObjectListing list = new ObjectListing();
|
253
|
-
|
254
|
-
S3ObjectSummary element = new S3ObjectSummary();
|
255
|
-
element.setKey(key);
|
256
|
-
element.setStorageClass(storageClass.toString());
|
257
|
-
|
258
|
-
List<S3ObjectSummary> objectSummaries = new ArrayList<>();
|
259
|
-
objectSummaries.add(element);
|
260
|
-
|
261
|
-
Field field = list.getClass().getDeclaredField("objectSummaries");
|
262
|
-
field.setAccessible(true);
|
263
|
-
field.set(list, objectSummaries);
|
264
|
-
|
265
|
-
return list;
|
266
|
-
}
|
267
|
-
|
268
215
|
static class Control
|
269
216
|
implements InputPlugin.Control
|
270
217
|
{
|
@@ -0,0 +1,67 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import org.embulk.EmbulkTestRuntime;
|
7
|
+
import org.junit.Before;
|
8
|
+
import org.junit.Rule;
|
9
|
+
import org.junit.Test;
|
10
|
+
import org.junit.runner.RunWith;
|
11
|
+
import org.mockito.ArgumentCaptor;
|
12
|
+
import org.mockito.Mock;
|
13
|
+
import org.mockito.internal.util.reflection.FieldSetter;
|
14
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
15
|
+
|
16
|
+
import static org.junit.Assert.assertEquals;
|
17
|
+
import static org.junit.Assert.assertFalse;
|
18
|
+
import static org.mockito.Matchers.any;
|
19
|
+
import static org.mockito.Mockito.mock;
|
20
|
+
import static org.mockito.Mockito.verify;
|
21
|
+
import static org.mockito.Mockito.when;
|
22
|
+
|
23
|
+
@RunWith(MockitoJUnitRunner.class)
|
24
|
+
public class TestS3NameOrderPrefixFileExplorer
|
25
|
+
{
|
26
|
+
private static final String BUCKET_NAME = "bucket_name";
|
27
|
+
private static final String PATH_PREFIX = "path_prefix";
|
28
|
+
private static final String LAST_PATH = "last_path";
|
29
|
+
|
30
|
+
@Rule
|
31
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
32
|
+
|
33
|
+
@Mock
|
34
|
+
private AmazonS3 s3Client;
|
35
|
+
|
36
|
+
private S3NameOrderPrefixFileExplorer s3NameOrderPrefixFileExplorer;
|
37
|
+
|
38
|
+
@Before
|
39
|
+
public void setUp()
|
40
|
+
{
|
41
|
+
s3NameOrderPrefixFileExplorer = new S3NameOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false, LAST_PATH);
|
42
|
+
}
|
43
|
+
|
44
|
+
@Test
|
45
|
+
public void fetch_should_return_list_objects()
|
46
|
+
{
|
47
|
+
final ObjectListing ol = mock(ObjectListing.class);
|
48
|
+
when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
|
49
|
+
|
50
|
+
s3NameOrderPrefixFileExplorer.fetch();
|
51
|
+
final ArgumentCaptor<ListObjectsRequest> listObjectsRequestCaptor = ArgumentCaptor.forClass(ListObjectsRequest.class);
|
52
|
+
|
53
|
+
verify(ol).getNextMarker();
|
54
|
+
verify(s3Client).listObjects(listObjectsRequestCaptor.capture());
|
55
|
+
final ListObjectsRequest listObjectsRequest = listObjectsRequestCaptor.getValue();
|
56
|
+
assertEquals(BUCKET_NAME, listObjectsRequest.getBucketName());
|
57
|
+
assertEquals(PATH_PREFIX, listObjectsRequest.getPrefix());
|
58
|
+
assertEquals(LAST_PATH, listObjectsRequest.getMarker());
|
59
|
+
}
|
60
|
+
|
61
|
+
@Test
|
62
|
+
public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
|
63
|
+
{
|
64
|
+
new FieldSetter(s3NameOrderPrefixFileExplorer, s3NameOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
|
65
|
+
assertFalse(s3NameOrderPrefixFileExplorer.hasNext());
|
66
|
+
}
|
67
|
+
}
|
@@ -0,0 +1,128 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
5
|
+
import com.amazonaws.services.s3.model.StorageClass;
|
6
|
+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
7
|
+
import org.embulk.EmbulkTestRuntime;
|
8
|
+
import org.embulk.config.ConfigException;
|
9
|
+
import org.embulk.input.s3.FileList;
|
10
|
+
import org.embulk.spi.util.RetryExecutor;
|
11
|
+
import org.junit.Before;
|
12
|
+
import org.junit.Rule;
|
13
|
+
import org.junit.Test;
|
14
|
+
import org.junit.runner.RunWith;
|
15
|
+
import org.mockito.Mock;
|
16
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
17
|
+
|
18
|
+
import java.util.Collections;
|
19
|
+
import java.util.List;
|
20
|
+
|
21
|
+
import static org.mockito.Mockito.doReturn;
|
22
|
+
import static org.mockito.Mockito.never;
|
23
|
+
import static org.mockito.Mockito.spy;
|
24
|
+
import static org.mockito.Mockito.times;
|
25
|
+
import static org.mockito.Mockito.verify;
|
26
|
+
import static org.mockito.Mockito.when;
|
27
|
+
|
28
|
+
@RunWith(MockitoJUnitRunner.class)
|
29
|
+
public class TestS3PrefixFileExplorer
|
30
|
+
{
|
31
|
+
private static final String PATH_PREFIX = "path_prefix";
|
32
|
+
private static final String BUCKET_NAME = "bucket_name";
|
33
|
+
private static final String OBJECT_KEY = "key";
|
34
|
+
|
35
|
+
@SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
|
36
|
+
@Rule
|
37
|
+
public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
|
38
|
+
|
39
|
+
@Mock
|
40
|
+
private AmazonS3 s3Client;
|
41
|
+
|
42
|
+
@Mock
|
43
|
+
private FileList.Builder builder;
|
44
|
+
|
45
|
+
@Mock
|
46
|
+
private S3ObjectSummary s3ObjectSummary;
|
47
|
+
|
48
|
+
private S3PrefixFileExplorer s3PrefixFileExplorer;
|
49
|
+
|
50
|
+
@Before
|
51
|
+
public void setUp()
|
52
|
+
{
|
53
|
+
s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false);
|
54
|
+
doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
|
55
|
+
}
|
56
|
+
|
57
|
+
@Test(expected = ConfigException.class)
|
58
|
+
public void addToBuilder_should_throw_exception_if_notskipped_glacier_storage()
|
59
|
+
{
|
60
|
+
when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
|
61
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
62
|
+
}
|
63
|
+
|
64
|
+
@Test
|
65
|
+
public void addToBuilder_should_skip_glacier_storage_if_allowed()
|
66
|
+
{
|
67
|
+
when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
|
68
|
+
// override spied object for changing `skipGlacierObjects`
|
69
|
+
s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
|
70
|
+
doReturn(false).when(s3PrefixFileExplorer).hasNext();
|
71
|
+
doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
|
72
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
73
|
+
|
74
|
+
verify(s3PrefixFileExplorer).hasNext();
|
75
|
+
verify(s3ObjectSummary, never()).getSize();
|
76
|
+
}
|
77
|
+
|
78
|
+
@Test
|
79
|
+
public void addToBuilder_should_loop_till_nothing_left()
|
80
|
+
{
|
81
|
+
// There are 3 loops totally but only 2 keys have been imported because the first key is in Glacier storage class and is skipped
|
82
|
+
when(builder.needsMore()).thenReturn(true);
|
83
|
+
// override spied object for changing `skipGlacierObjects`
|
84
|
+
s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
|
85
|
+
when(s3ObjectSummary.getStorageClass())
|
86
|
+
.thenReturn(StorageClass.Glacier.toString())
|
87
|
+
.thenReturn(StorageClass.Standard.toString());
|
88
|
+
when(s3ObjectSummary.getSize()).thenReturn(1L);
|
89
|
+
when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
|
90
|
+
doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
|
91
|
+
doReturn(true).doReturn(true).doReturn(false).when(s3PrefixFileExplorer).hasNext();
|
92
|
+
|
93
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
94
|
+
verify(builder, times(2)).add(PATH_PREFIX + OBJECT_KEY, 1);
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void addToBuilder_should_stop_import_if_too_many_files()
|
99
|
+
{
|
100
|
+
when(builder.needsMore()).thenReturn(false);
|
101
|
+
when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Standard.toString());
|
102
|
+
when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
|
103
|
+
when(s3ObjectSummary.getSize()).thenReturn(1L);
|
104
|
+
doReturn(true).when(s3PrefixFileExplorer).hasNext();
|
105
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
106
|
+
|
107
|
+
verify(builder).add(PATH_PREFIX + OBJECT_KEY, 1);
|
108
|
+
verify(s3PrefixFileExplorer, never()).hasNext();
|
109
|
+
}
|
110
|
+
|
111
|
+
private S3PrefixFileExplorer spyS3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
|
112
|
+
{
|
113
|
+
return spy(new S3PrefixFileExplorer(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects)
|
114
|
+
{
|
115
|
+
@Override
|
116
|
+
protected List<S3ObjectSummary> fetch()
|
117
|
+
{
|
118
|
+
return null;
|
119
|
+
}
|
120
|
+
|
121
|
+
@Override
|
122
|
+
protected boolean hasNext()
|
123
|
+
{
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
});
|
127
|
+
}
|
128
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
6
|
+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
7
|
+
import org.embulk.EmbulkTestRuntime;
|
8
|
+
import org.embulk.input.s3.FileList;
|
9
|
+
import org.junit.Before;
|
10
|
+
import org.junit.Rule;
|
11
|
+
import org.junit.Test;
|
12
|
+
import org.junit.runner.RunWith;
|
13
|
+
import org.mockito.Mock;
|
14
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
15
|
+
|
16
|
+
import static org.mockito.Matchers.any;
|
17
|
+
import static org.mockito.Mockito.verify;
|
18
|
+
import static org.mockito.Mockito.when;
|
19
|
+
|
20
|
+
@RunWith(MockitoJUnitRunner.class)
|
21
|
+
public class TestS3SingleFileExplorer
|
22
|
+
{
|
23
|
+
private static final String PATH = "path";
|
24
|
+
private static final String BUCKET_NAME = "bucket_name";
|
25
|
+
|
26
|
+
@SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
|
27
|
+
@Rule
|
28
|
+
public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
|
29
|
+
|
30
|
+
@Mock
|
31
|
+
private AmazonS3 s3Client;
|
32
|
+
|
33
|
+
@Mock
|
34
|
+
private FileList.Builder builder;
|
35
|
+
|
36
|
+
@Mock
|
37
|
+
private ObjectMetadata metadata;
|
38
|
+
|
39
|
+
private S3SingleFileExplorer s3SingleFileExplorer;
|
40
|
+
|
41
|
+
@Before
|
42
|
+
public void setUp()
|
43
|
+
{
|
44
|
+
s3SingleFileExplorer = new S3SingleFileExplorer(BUCKET_NAME, s3Client, null, PATH);
|
45
|
+
}
|
46
|
+
|
47
|
+
@Test
|
48
|
+
public void addToBuilder_should_request_single_object_metadata()
|
49
|
+
{
|
50
|
+
when(s3Client.getObjectMetadata(any(GetObjectMetadataRequest.class))).thenReturn(metadata);
|
51
|
+
when(metadata.getContentLength()).thenReturn(1L);
|
52
|
+
s3SingleFileExplorer.addToBuilder(builder);
|
53
|
+
|
54
|
+
verify(builder).add(PATH, 1);
|
55
|
+
}
|
56
|
+
}
|
@@ -0,0 +1,112 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
7
|
+
import org.embulk.EmbulkTestRuntime;
|
8
|
+
import org.junit.Before;
|
9
|
+
import org.junit.Rule;
|
10
|
+
import org.junit.Test;
|
11
|
+
import org.junit.runner.RunWith;
|
12
|
+
import org.mockito.Mock;
|
13
|
+
import org.mockito.internal.util.reflection.FieldSetter;
|
14
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
15
|
+
|
16
|
+
import java.util.Arrays;
|
17
|
+
import java.util.Calendar;
|
18
|
+
import java.util.List;
|
19
|
+
import java.util.Optional;
|
20
|
+
|
21
|
+
import static org.junit.Assert.assertEquals;
|
22
|
+
import static org.junit.Assert.assertFalse;
|
23
|
+
import static org.mockito.Matchers.any;
|
24
|
+
import static org.mockito.Mockito.mock;
|
25
|
+
import static org.mockito.Mockito.when;
|
26
|
+
|
27
|
+
@RunWith(MockitoJUnitRunner.class)
|
28
|
+
public class TestS3TimeOrderPrefixFileExplorer
|
29
|
+
{
|
30
|
+
private static final String BUCKET_NAME = "bucket_name";
|
31
|
+
private static final String PATH_PREFIX = "path_prefix";
|
32
|
+
|
33
|
+
@Rule
|
34
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
35
|
+
|
36
|
+
@Mock
|
37
|
+
private AmazonS3 s3Client;
|
38
|
+
|
39
|
+
private S3TimeOrderPrefixFileExplorer s3TimeOrderPrefixFileExplorer;
|
40
|
+
|
41
|
+
@Before
|
42
|
+
public void setUp()
|
43
|
+
{
|
44
|
+
final Calendar cal = Calendar.getInstance();
|
45
|
+
cal.set(2019, Calendar.MAY, 25, 10, 0);
|
46
|
+
s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
|
47
|
+
false, Optional.empty(), cal.getTime());
|
48
|
+
}
|
49
|
+
|
50
|
+
@Test
|
51
|
+
public void fetch_should_return_filtered_objects_before_end_time()
|
52
|
+
{
|
53
|
+
final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
|
54
|
+
final Calendar cal = Calendar.getInstance();
|
55
|
+
cal.set(2019, Calendar.MAY, 24, 10, 0);
|
56
|
+
when(s3ObjectBefore.getLastModified()).thenReturn(cal.getTime());
|
57
|
+
|
58
|
+
final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
|
59
|
+
cal.set(2019, Calendar.MAY, 26, 10, 0);
|
60
|
+
when(s3ObjectAfter.getLastModified()).thenReturn(cal.getTime());
|
61
|
+
|
62
|
+
final ObjectListing ol = mock(ObjectListing.class);
|
63
|
+
when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
|
64
|
+
when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectBefore, s3ObjectAfter));
|
65
|
+
|
66
|
+
final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
|
67
|
+
assertEquals(1, result.size());
|
68
|
+
assertEquals(s3ObjectBefore, result.get(0));
|
69
|
+
}
|
70
|
+
|
71
|
+
@Test
|
72
|
+
public void fetch_should_return_filtered_objects_after_or_equals_begin_time()
|
73
|
+
{
|
74
|
+
final Calendar to = Calendar.getInstance();
|
75
|
+
to.set(2019, Calendar.MAY, 25, 10, 0);
|
76
|
+
final Calendar from = Calendar.getInstance();
|
77
|
+
from.set(2019, Calendar.MAY, 24, 10, 0);
|
78
|
+
s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
|
79
|
+
false, Optional.of(from.getTime()), to.getTime());
|
80
|
+
|
81
|
+
final S3ObjectSummary s3ObjectEqual = mock(S3ObjectSummary.class);
|
82
|
+
final Calendar equalCal = Calendar.getInstance();
|
83
|
+
equalCal.set(2019, Calendar.MAY, 24, 10, 0);
|
84
|
+
when(s3ObjectEqual.getLastModified()).thenReturn(equalCal.getTime());
|
85
|
+
|
86
|
+
final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
|
87
|
+
final Calendar beforeCal = Calendar.getInstance();
|
88
|
+
beforeCal.set(2019, Calendar.MAY, 24, 20, 0);
|
89
|
+
when(s3ObjectBefore.getLastModified()).thenReturn(beforeCal.getTime());
|
90
|
+
|
91
|
+
final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
|
92
|
+
final Calendar afterCal = Calendar.getInstance();
|
93
|
+
afterCal.set(2019, Calendar.MAY, 26, 10, 0);
|
94
|
+
when(s3ObjectAfter.getLastModified()).thenReturn(afterCal.getTime());
|
95
|
+
|
96
|
+
final ObjectListing ol = mock(ObjectListing.class);
|
97
|
+
when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
|
98
|
+
when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectEqual, s3ObjectBefore, s3ObjectAfter));
|
99
|
+
|
100
|
+
final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
|
101
|
+
assertEquals(2, result.size());
|
102
|
+
assertEquals(s3ObjectEqual, result.get(0));
|
103
|
+
assertEquals(s3ObjectBefore, result.get(1));
|
104
|
+
}
|
105
|
+
|
106
|
+
@Test
|
107
|
+
public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
|
108
|
+
{
|
109
|
+
new FieldSetter(s3TimeOrderPrefixFileExplorer, s3TimeOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
|
110
|
+
assertFalse(s3TimeOrderPrefixFileExplorer.hasNext());
|
111
|
+
}
|
112
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,19 +53,28 @@ files:
|
|
53
53
|
- src/main/java/org/embulk/input/s3/HttpProxy.java
|
54
54
|
- src/main/java/org/embulk/input/s3/RetrySupportPluginTask.java
|
55
55
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
56
|
-
- src/
|
56
|
+
- src/main/java/org/embulk/input/s3/explorer/S3FileExplorer.java
|
57
|
+
- src/main/java/org/embulk/input/s3/explorer/S3NameOrderPrefixFileExplorer.java
|
58
|
+
- src/main/java/org/embulk/input/s3/explorer/S3PrefixFileExplorer.java
|
59
|
+
- src/main/java/org/embulk/input/s3/explorer/S3SingleFileExplorer.java
|
60
|
+
- src/main/java/org/embulk/input/s3/explorer/S3TimeOrderPrefixFileExplorer.java
|
61
|
+
- src/main/java/org/embulk/input/s3/utils/DateUtils.java
|
57
62
|
- src/test/java/org/embulk/input/s3/TestAwsCredentials.java
|
58
63
|
- src/test/java/org/embulk/input/s3/TestDefaultRetryable.java
|
59
64
|
- src/test/java/org/embulk/input/s3/TestFileList.java
|
60
65
|
- src/test/java/org/embulk/input/s3/TestHttpProxy.java
|
61
66
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
62
67
|
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
68
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3NameOrderPrefixFileExplorer.java
|
69
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3PrefixFileExplorer.java
|
70
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3SingleFileExplorer.java
|
71
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3TimeOrderPrefixFileExplorer.java
|
63
72
|
- src/test/resources/sample_01.csv
|
64
|
-
- classpath/embulk-util-aws-credentials-0.3.
|
73
|
+
- classpath/embulk-util-aws-credentials-0.3.4.jar
|
65
74
|
- classpath/httpcore-4.4.9.jar
|
66
75
|
- classpath/httpclient-4.5.5.jar
|
67
76
|
- classpath/ion-java-1.0.2.jar
|
68
|
-
- classpath/embulk-input-s3-0.3.
|
77
|
+
- classpath/embulk-input-s3-0.3.4.jar
|
69
78
|
- classpath/aws-java-sdk-core-1.11.466.jar
|
70
79
|
- classpath/jcl-over-slf4j-1.7.12.jar
|
71
80
|
- classpath/commons-codec-1.10.jar
|
Binary file
|
@@ -1,164 +0,0 @@
|
|
1
|
-
package org.embulk.input.s3;
|
2
|
-
|
3
|
-
import com.amazonaws.AmazonServiceException;
|
4
|
-
import com.amazonaws.services.s3.AmazonS3;
|
5
|
-
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
6
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
7
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
8
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
9
|
-
import org.apache.http.HttpStatus;
|
10
|
-
import org.embulk.EmbulkTestRuntime;
|
11
|
-
import org.embulk.spi.util.RetryExecutor;
|
12
|
-
import org.junit.Before;
|
13
|
-
import org.junit.Rule;
|
14
|
-
import org.junit.Test;
|
15
|
-
|
16
|
-
import java.util.Optional;
|
17
|
-
|
18
|
-
import static org.mockito.Matchers.any;
|
19
|
-
import static org.mockito.Mockito.doReturn;
|
20
|
-
import static org.mockito.Mockito.doThrow;
|
21
|
-
import static org.mockito.Mockito.mock;
|
22
|
-
|
23
|
-
public class TestAbstractS3FileInputPlugin
|
24
|
-
{
|
25
|
-
private static RetryExecutor retryExecutor()
|
26
|
-
{
|
27
|
-
return RetryExecutor.retryExecutor()
|
28
|
-
.withInitialRetryWait(0)
|
29
|
-
.withMaxRetryWait(0);
|
30
|
-
}
|
31
|
-
|
32
|
-
private static AbstractS3FileInputPlugin dummyS3Plugin()
|
33
|
-
{
|
34
|
-
return new AbstractS3FileInputPlugin()
|
35
|
-
{
|
36
|
-
@Override
|
37
|
-
protected Class<? extends PluginTask> getTaskClass()
|
38
|
-
{
|
39
|
-
return PluginTask.class;
|
40
|
-
}
|
41
|
-
};
|
42
|
-
}
|
43
|
-
|
44
|
-
private static class SomeException extends RuntimeException
|
45
|
-
{
|
46
|
-
}
|
47
|
-
|
48
|
-
@Rule
|
49
|
-
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
50
|
-
|
51
|
-
private AmazonS3 client;
|
52
|
-
|
53
|
-
@Before
|
54
|
-
public void createResources()
|
55
|
-
{
|
56
|
-
client = mock(AmazonS3.class);
|
57
|
-
}
|
58
|
-
|
59
|
-
@Test
|
60
|
-
public void listS3FilesByPrefix()
|
61
|
-
{
|
62
|
-
doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
|
63
|
-
FileList.Builder builder = new FileList.Builder();
|
64
|
-
dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
|
65
|
-
}
|
66
|
-
|
67
|
-
@Test
|
68
|
-
public void listS3FileByPrefix_with_retry()
|
69
|
-
{
|
70
|
-
doThrow(new RuntimeException()).doReturn(new ObjectListing())
|
71
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
72
|
-
FileList.Builder builder = new FileList.Builder();
|
73
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
74
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
75
|
-
retryExecutor().withRetryLimit(1));
|
76
|
-
}
|
77
|
-
|
78
|
-
@Test(expected = SomeException.class)
|
79
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
|
80
|
-
{
|
81
|
-
doThrow(new SomeException()).doReturn(new ObjectListing())
|
82
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
83
|
-
FileList.Builder builder = new FileList.Builder();
|
84
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
85
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
86
|
-
retryExecutor().withRetryLimit(0));
|
87
|
-
}
|
88
|
-
|
89
|
-
@Test(expected = AmazonServiceException.class)
|
90
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_forbidden_code()
|
91
|
-
{
|
92
|
-
AmazonServiceException exception = new AmazonServiceException("Forbidden exception");
|
93
|
-
exception.setStatusCode(HttpStatus.SC_FORBIDDEN);
|
94
|
-
exception.setErrorType(AmazonServiceException.ErrorType.Client);
|
95
|
-
|
96
|
-
doThrow(exception).doReturn(new ObjectListing())
|
97
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
98
|
-
FileList.Builder builder = new FileList.Builder();
|
99
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
100
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
101
|
-
retryExecutor().withRetryLimit(1));
|
102
|
-
}
|
103
|
-
|
104
|
-
@Test(expected = AmazonServiceException.class)
|
105
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_methodnotallow_code()
|
106
|
-
{
|
107
|
-
AmazonServiceException exception = new AmazonServiceException("method not allow exception");
|
108
|
-
exception.setStatusCode(HttpStatus.SC_METHOD_NOT_ALLOWED);
|
109
|
-
exception.setErrorType(AmazonServiceException.ErrorType.Client);
|
110
|
-
|
111
|
-
doThrow(exception).doReturn(new ObjectListing())
|
112
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
113
|
-
FileList.Builder builder = new FileList.Builder();
|
114
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
115
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
116
|
-
retryExecutor().withRetryLimit(1));
|
117
|
-
}
|
118
|
-
|
119
|
-
@Test(expected = AmazonServiceException.class)
|
120
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_expiredToken_code()
|
121
|
-
{
|
122
|
-
AmazonServiceException exception = new AmazonServiceException("expired token exception");
|
123
|
-
exception.setStatusCode(HttpStatus.SC_BAD_REQUEST);
|
124
|
-
exception.setErrorCode("ExpiredToken");
|
125
|
-
exception.setErrorType(AmazonServiceException.ErrorType.Client);
|
126
|
-
|
127
|
-
doThrow(exception).doReturn(new ObjectListing())
|
128
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
129
|
-
FileList.Builder builder = new FileList.Builder();
|
130
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
131
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
132
|
-
retryExecutor().withRetryLimit(1));
|
133
|
-
}
|
134
|
-
|
135
|
-
@Test
|
136
|
-
public void addS3DirectObject()
|
137
|
-
{
|
138
|
-
doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
|
139
|
-
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
|
140
|
-
dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
|
141
|
-
}
|
142
|
-
|
143
|
-
@Test
|
144
|
-
public void addS3DirectObject_with_retry()
|
145
|
-
{
|
146
|
-
doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
|
147
|
-
.when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
|
148
|
-
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
|
149
|
-
dummyS3Plugin().addS3DirectObject(
|
150
|
-
builder, client, "some_bucket", "some_prefix",
|
151
|
-
retryExecutor());
|
152
|
-
}
|
153
|
-
|
154
|
-
@Test(expected = SomeException.class)
|
155
|
-
public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
|
156
|
-
{
|
157
|
-
doThrow(new SomeException()).doReturn(new ObjectMetadata())
|
158
|
-
.when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
|
159
|
-
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
|
160
|
-
dummyS3Plugin().addS3DirectObject(
|
161
|
-
builder, client, "some_bucket", "some_prefix",
|
162
|
-
retryExecutor().withRetryLimit(0));
|
163
|
-
}
|
164
|
-
}
|