embulk-input-s3 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/embulk-input-s3-0.3.4.jar +0 -0
- data/classpath/{embulk-util-aws-credentials-0.3.3.jar → embulk-util-aws-credentials-0.3.4.jar} +0 -0
- data/src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java +73 -114
- data/src/main/java/org/embulk/input/s3/DefaultRetryable.java +1 -1
- data/src/main/java/org/embulk/input/s3/explorer/S3FileExplorer.java +21 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3NameOrderPrefixFileExplorer.java +45 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3PrefixFileExplorer.java +57 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3SingleFileExplorer.java +35 -0
- data/src/main/java/org/embulk/input/s3/explorer/S3TimeOrderPrefixFileExplorer.java +70 -0
- data/src/main/java/org/embulk/input/s3/utils/DateUtils.java +28 -0
- data/src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java +0 -53
- data/src/test/java/org/embulk/input/s3/explorer/TestS3NameOrderPrefixFileExplorer.java +67 -0
- data/src/test/java/org/embulk/input/s3/explorer/TestS3PrefixFileExplorer.java +128 -0
- data/src/test/java/org/embulk/input/s3/explorer/TestS3SingleFileExplorer.java +56 -0
- data/src/test/java/org/embulk/input/s3/explorer/TestS3TimeOrderPrefixFileExplorer.java +112 -0
- metadata +14 -5
- data/classpath/embulk-input-s3-0.3.3.jar +0 -0
- data/src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java +0 -164
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0497b4779ac08c091c1291583ef439ada4f48ea2
|
4
|
+
data.tar.gz: 0ef8f1d26751cf22d7975b570d9f7cbcfd7e270f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82310a7bae6f789ad0962346438a945b4ed59a21fc34be6bbd8e705f979482be58994a1d5d7258f07020ad72cc8dee240313b569b04081f989299a18845dbce5
|
7
|
+
data.tar.gz: 5a41d741bb26cd0d619149c8c1d4b47495c167570f450460e198a129c1b91ebc9f55b45eb771e3b58de6a69bd58f3f52029f0d272200c46f001b3ff6e24ecd13
|
Binary file
|
data/classpath/{embulk-util-aws-credentials-0.3.3.jar → embulk-util-aws-credentials-0.3.4.jar}
RENAMED
Binary file
|
@@ -7,15 +7,9 @@ import com.amazonaws.auth.AWSCredentialsProvider;
|
|
7
7
|
import com.amazonaws.retry.PredefinedRetryPolicies;
|
8
8
|
import com.amazonaws.services.s3.AmazonS3;
|
9
9
|
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
|
10
|
-
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
11
10
|
import com.amazonaws.services.s3.model.GetObjectRequest;
|
12
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
13
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
14
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
15
11
|
import com.amazonaws.services.s3.model.S3Object;
|
16
12
|
import com.amazonaws.services.s3.model.S3ObjectInputStream;
|
17
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
18
|
-
import com.amazonaws.services.s3.model.StorageClass;
|
19
13
|
import com.google.common.annotations.VisibleForTesting;
|
20
14
|
import org.embulk.config.Config;
|
21
15
|
import org.embulk.config.ConfigDefault;
|
@@ -26,6 +20,10 @@ import org.embulk.config.ConfigSource;
|
|
26
20
|
import org.embulk.config.Task;
|
27
21
|
import org.embulk.config.TaskReport;
|
28
22
|
import org.embulk.config.TaskSource;
|
23
|
+
import org.embulk.input.s3.explorer.S3NameOrderPrefixFileExplorer;
|
24
|
+
import org.embulk.input.s3.explorer.S3SingleFileExplorer;
|
25
|
+
import org.embulk.input.s3.explorer.S3TimeOrderPrefixFileExplorer;
|
26
|
+
import org.embulk.input.s3.utils.DateUtils;
|
29
27
|
import org.embulk.spi.BufferAllocator;
|
30
28
|
import org.embulk.spi.Exec;
|
31
29
|
import org.embulk.spi.FileInputPlugin;
|
@@ -40,6 +38,9 @@ import org.slf4j.Logger;
|
|
40
38
|
|
41
39
|
import java.io.IOException;
|
42
40
|
import java.io.InputStream;
|
41
|
+
import java.text.SimpleDateFormat;
|
42
|
+
import java.util.Collections;
|
43
|
+
import java.util.Date;
|
43
44
|
import java.util.Iterator;
|
44
45
|
import java.util.List;
|
45
46
|
import java.util.Optional;
|
@@ -51,6 +52,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
51
52
|
implements FileInputPlugin
|
52
53
|
{
|
53
54
|
private static final Logger LOGGER = Exec.getLogger(S3FileInputPlugin.class);
|
55
|
+
private static final String FULL_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
|
54
56
|
|
55
57
|
public interface PluginTask
|
56
58
|
extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
|
@@ -88,12 +90,35 @@ public abstract class AbstractS3FileInputPlugin
|
|
88
90
|
@ConfigDefault("false")
|
89
91
|
boolean getSkipGlacierObjects();
|
90
92
|
|
93
|
+
@Config("use_modified_time")
|
94
|
+
@ConfigDefault("false")
|
95
|
+
boolean getUseModifiedTime();
|
96
|
+
|
97
|
+
@Config("last_modified_time")
|
98
|
+
@ConfigDefault("null")
|
99
|
+
Optional<String> getLastModifiedTime();
|
100
|
+
|
91
101
|
// TODO timeout, ssl, etc
|
92
102
|
|
103
|
+
////////////////////////////////////////
|
104
|
+
// Internal configurations
|
105
|
+
////////////////////////////////////////
|
106
|
+
|
93
107
|
FileList getFiles();
|
94
108
|
|
95
109
|
void setFiles(FileList files);
|
96
110
|
|
111
|
+
/**
|
112
|
+
* end_modified_time is conditionally set if modified_time mode is enabled.
|
113
|
+
*
|
114
|
+
* It is internal state and must not be set in config.yml
|
115
|
+
*/
|
116
|
+
@Config("__end_modified_time")
|
117
|
+
@ConfigDefault("null")
|
118
|
+
Optional<Date> getEndModifiedTime();
|
119
|
+
|
120
|
+
void setEndModifiedTime(Optional<Date> endModifiedTime);
|
121
|
+
|
97
122
|
@ConfigInject
|
98
123
|
BufferAllocator getBufferAllocator();
|
99
124
|
}
|
@@ -105,6 +130,7 @@ public abstract class AbstractS3FileInputPlugin
|
|
105
130
|
{
|
106
131
|
PluginTask task = config.loadConfig(getTaskClass());
|
107
132
|
|
133
|
+
errorIfInternalParamsAreSet(task);
|
108
134
|
validateInputTask(task);
|
109
135
|
// list files recursively
|
110
136
|
task.setFiles(listFiles(task));
|
@@ -130,9 +156,15 @@ public abstract class AbstractS3FileInputPlugin
|
|
130
156
|
|
131
157
|
// last_path
|
132
158
|
if (task.getIncremental()) {
|
133
|
-
|
134
|
-
|
135
|
-
|
159
|
+
if (task.getUseModifiedTime()) {
|
160
|
+
Date endModifiedTime = task.getEndModifiedTime().orElse(new Date());
|
161
|
+
configDiff.set("last_modified_time", new SimpleDateFormat(FULL_DATE_FORMAT).format(endModifiedTime));
|
162
|
+
}
|
163
|
+
else {
|
164
|
+
Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
|
165
|
+
LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
|
166
|
+
configDiff.set("last_path", lastPath);
|
167
|
+
}
|
136
168
|
}
|
137
169
|
return configDiff;
|
138
170
|
}
|
@@ -237,22 +269,35 @@ public abstract class AbstractS3FileInputPlugin
|
|
237
269
|
String bucketName = task.getBucket();
|
238
270
|
FileList.Builder builder = new FileList.Builder(task);
|
239
271
|
RetryExecutor retryExec = retryExecutorFrom(task);
|
272
|
+
|
240
273
|
if (task.getPath().isPresent()) {
|
241
274
|
LOGGER.info("Start getting object with path: [{}]", task.getPath().get());
|
242
|
-
|
275
|
+
new S3SingleFileExplorer(bucketName, client, retryExec, task.getPath().get()).addToBuilder(builder);
|
276
|
+
return builder.build();
|
243
277
|
}
|
244
|
-
else {
|
245
|
-
// does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
|
246
|
-
LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
|
247
|
-
if (task.getPathPrefix().get().equals("/")) {
|
248
|
-
LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
|
249
|
-
}
|
250
278
|
|
251
|
-
|
252
|
-
|
253
|
-
|
279
|
+
// does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
|
280
|
+
LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
|
281
|
+
if (task.getPathPrefix().get().equals("/")) {
|
282
|
+
LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
|
254
283
|
}
|
255
284
|
|
285
|
+
if (task.getUseModifiedTime()) {
|
286
|
+
Date now = new Date();
|
287
|
+
Optional<Date> from = task.getLastModifiedTime().isPresent()
|
288
|
+
? Optional.of(DateUtils.parse(task.getLastModifiedTime().get(), Collections.singletonList(FULL_DATE_FORMAT)))
|
289
|
+
: Optional.empty();
|
290
|
+
task.setEndModifiedTime(Optional.of(now));
|
291
|
+
|
292
|
+
new S3TimeOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
|
293
|
+
task.getSkipGlacierObjects(), from, now).addToBuilder(builder);
|
294
|
+
}
|
295
|
+
else {
|
296
|
+
new S3NameOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
|
297
|
+
task.getSkipGlacierObjects(), task.getLastPath().orElse(null)).addToBuilder(builder);
|
298
|
+
}
|
299
|
+
|
300
|
+
LOGGER.info("Found total [{}] files", builder.size());
|
256
301
|
return builder.build();
|
257
302
|
}
|
258
303
|
catch (AmazonServiceException ex) {
|
@@ -268,107 +313,13 @@ public abstract class AbstractS3FileInputPlugin
|
|
268
313
|
}
|
269
314
|
}
|
270
315
|
|
271
|
-
|
272
|
-
public void addS3DirectObject(FileList.Builder builder,
|
273
|
-
final AmazonS3 client,
|
274
|
-
String bucket,
|
275
|
-
String objectKey)
|
276
|
-
{
|
277
|
-
addS3DirectObject(builder, client, bucket, objectKey, null);
|
278
|
-
}
|
279
|
-
|
280
|
-
@VisibleForTesting
|
281
|
-
public void addS3DirectObject(FileList.Builder builder,
|
282
|
-
final AmazonS3 client,
|
283
|
-
String bucket,
|
284
|
-
String objectKey,
|
285
|
-
RetryExecutor retryExec)
|
286
|
-
{
|
287
|
-
final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucket, objectKey);
|
288
|
-
|
289
|
-
ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
|
290
|
-
@Override
|
291
|
-
public ObjectMetadata call()
|
292
|
-
{
|
293
|
-
return client.getObjectMetadata(objectMetadataRequest);
|
294
|
-
}
|
295
|
-
}.executeWith(retryExec);
|
296
|
-
|
297
|
-
builder.add(objectKey, objectMetadata.getContentLength());
|
298
|
-
}
|
299
|
-
|
300
|
-
private void validateInputTask(PluginTask task)
|
316
|
+
private void validateInputTask(final PluginTask task)
|
301
317
|
{
|
302
318
|
if (!task.getPathPrefix().isPresent() && !task.getPath().isPresent()) {
|
303
319
|
throw new ConfigException("Either path or path_prefix is required");
|
304
320
|
}
|
305
321
|
}
|
306
322
|
|
307
|
-
@VisibleForTesting
|
308
|
-
public static void listS3FilesByPrefix(FileList.Builder builder,
|
309
|
-
final AmazonS3 client,
|
310
|
-
String bucketName,
|
311
|
-
String prefix,
|
312
|
-
Optional<String> lastPath,
|
313
|
-
boolean skipGlacierObjects)
|
314
|
-
{
|
315
|
-
listS3FilesByPrefix(builder, client, bucketName, prefix, lastPath, skipGlacierObjects, null);
|
316
|
-
}
|
317
|
-
|
318
|
-
/**
|
319
|
-
* Lists S3 filenames filtered by prefix.
|
320
|
-
* <p>
|
321
|
-
* The resulting list does not include the file that's size == 0.
|
322
|
-
* @param builder custom Filelist builder
|
323
|
-
* @param client Amazon S3
|
324
|
-
* @param bucketName Amazon S3 bucket name
|
325
|
-
* @param prefix Amazon S3 bucket name prefix
|
326
|
-
* @param lastPath last path
|
327
|
-
* @param skipGlacierObjects skip gracier objects
|
328
|
-
* @param retryExec a retry executor object to do the retrying
|
329
|
-
*/
|
330
|
-
@VisibleForTesting
|
331
|
-
public static void listS3FilesByPrefix(FileList.Builder builder,
|
332
|
-
final AmazonS3 client,
|
333
|
-
String bucketName,
|
334
|
-
String prefix,
|
335
|
-
Optional<String> lastPath,
|
336
|
-
boolean skipGlacierObjects,
|
337
|
-
RetryExecutor retryExec)
|
338
|
-
{
|
339
|
-
String lastKey = lastPath.orElse(null);
|
340
|
-
do {
|
341
|
-
final String finalLastKey = lastKey;
|
342
|
-
final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
|
343
|
-
ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects") {
|
344
|
-
@Override
|
345
|
-
public ObjectListing call()
|
346
|
-
{
|
347
|
-
return client.listObjects(req);
|
348
|
-
}
|
349
|
-
}.executeWith(retryExec);
|
350
|
-
for (S3ObjectSummary s : ol.getObjectSummaries()) {
|
351
|
-
if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
|
352
|
-
if (skipGlacierObjects) {
|
353
|
-
Exec.getLogger("AbstractS3FileInputPlugin.class").warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
|
354
|
-
continue;
|
355
|
-
}
|
356
|
-
else {
|
357
|
-
throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
|
358
|
-
}
|
359
|
-
}
|
360
|
-
if (s.getSize() > 0) {
|
361
|
-
builder.add(s.getKey(), s.getSize());
|
362
|
-
if (!builder.needsMore()) {
|
363
|
-
LOGGER.warn("Too many files matched, stop listing file");
|
364
|
-
return;
|
365
|
-
}
|
366
|
-
}
|
367
|
-
}
|
368
|
-
lastKey = ol.getNextMarker();
|
369
|
-
} while (lastKey != null);
|
370
|
-
}
|
371
|
-
|
372
323
|
@Override
|
373
324
|
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
374
325
|
{
|
@@ -440,6 +391,14 @@ public abstract class AbstractS3FileInputPlugin
|
|
440
391
|
}
|
441
392
|
}
|
442
393
|
|
394
|
+
@VisibleForTesting
|
395
|
+
static void errorIfInternalParamsAreSet(PluginTask task)
|
396
|
+
{
|
397
|
+
if (task.getEndModifiedTime().isPresent()) {
|
398
|
+
throw new ConfigException("'__end_modified_time' must not be set.");
|
399
|
+
}
|
400
|
+
}
|
401
|
+
|
443
402
|
// TODO create single-file InputStreamFileInput utility
|
444
403
|
private class SingleFileProvider
|
445
404
|
implements InputStreamFileInput.Provider
|
@@ -19,7 +19,7 @@ import static org.embulk.spi.util.RetryExecutor.Retryable;
|
|
19
19
|
* Retryable utility, regardless the occurred exceptions,
|
20
20
|
* Also provide a default approach for exception propagation.
|
21
21
|
*/
|
22
|
-
class DefaultRetryable<T> implements Retryable<T>
|
22
|
+
public class DefaultRetryable<T> implements Retryable<T>
|
23
23
|
{
|
24
24
|
private static final Logger log = Exec.getLogger(DefaultRetryable.class);
|
25
25
|
private static final Set<Integer> NONRETRYABLE_STATUS_CODES = new HashSet<Integer>(2);
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import org.embulk.input.s3.FileList;
|
5
|
+
import org.embulk.spi.util.RetryExecutor;
|
6
|
+
|
7
|
+
public abstract class S3FileExplorer
|
8
|
+
{
|
9
|
+
protected String bucketName;
|
10
|
+
protected AmazonS3 s3Client;
|
11
|
+
protected RetryExecutor retryExecutor;
|
12
|
+
|
13
|
+
public S3FileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor)
|
14
|
+
{
|
15
|
+
this.bucketName = bucketName;
|
16
|
+
this.s3Client = s3Client;
|
17
|
+
this.retryExecutor = retryExecutor;
|
18
|
+
}
|
19
|
+
|
20
|
+
public abstract void addToBuilder(FileList.Builder builder);
|
21
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
7
|
+
import org.embulk.input.s3.DefaultRetryable;
|
8
|
+
import org.embulk.spi.util.RetryExecutor;
|
9
|
+
|
10
|
+
import java.util.List;
|
11
|
+
|
12
|
+
public class S3NameOrderPrefixFileExplorer extends S3PrefixFileExplorer
|
13
|
+
{
|
14
|
+
private String lastPath;
|
15
|
+
|
16
|
+
public S3NameOrderPrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor,
|
17
|
+
final String pathPrefix, final boolean skipGlacierObjects, final String lastPath)
|
18
|
+
{
|
19
|
+
super(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects);
|
20
|
+
this.lastPath = lastPath;
|
21
|
+
}
|
22
|
+
|
23
|
+
@Override
|
24
|
+
protected List<S3ObjectSummary> fetch()
|
25
|
+
{
|
26
|
+
final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
|
27
|
+
final ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects")
|
28
|
+
{
|
29
|
+
@Override
|
30
|
+
public ObjectListing call()
|
31
|
+
{
|
32
|
+
return s3Client.listObjects(req);
|
33
|
+
}
|
34
|
+
}.executeWith(retryExecutor);
|
35
|
+
lastPath = ol.getNextMarker();
|
36
|
+
|
37
|
+
return ol.getObjectSummaries();
|
38
|
+
}
|
39
|
+
|
40
|
+
@Override
|
41
|
+
protected boolean hasNext()
|
42
|
+
{
|
43
|
+
return lastPath != null;
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,57 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
5
|
+
import com.amazonaws.services.s3.model.StorageClass;
|
6
|
+
import org.embulk.config.ConfigException;
|
7
|
+
import org.embulk.input.s3.FileList;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.util.RetryExecutor;
|
10
|
+
import org.slf4j.Logger;
|
11
|
+
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
public abstract class S3PrefixFileExplorer extends S3FileExplorer
|
15
|
+
{
|
16
|
+
private static final Logger LOGGER = Exec.getLogger(S3PrefixFileExplorer.class);
|
17
|
+
|
18
|
+
protected String pathPrefix;
|
19
|
+
|
20
|
+
private final boolean skipGlacierObjects;
|
21
|
+
|
22
|
+
public S3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
|
23
|
+
{
|
24
|
+
super(bucketName, s3Client, retryExecutor);
|
25
|
+
this.pathPrefix = pathPrefix;
|
26
|
+
this.skipGlacierObjects = skipGlacierObjects;
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public void addToBuilder(final FileList.Builder builder)
|
31
|
+
{
|
32
|
+
do {
|
33
|
+
final List<S3ObjectSummary> s3ObjectSummaries = fetch();
|
34
|
+
|
35
|
+
for (final S3ObjectSummary s : s3ObjectSummaries) {
|
36
|
+
if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
|
37
|
+
if (skipGlacierObjects) {
|
38
|
+
LOGGER.warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
|
39
|
+
continue;
|
40
|
+
}
|
41
|
+
throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
|
42
|
+
}
|
43
|
+
if (s.getSize() > 0) {
|
44
|
+
builder.add(s.getKey(), s.getSize());
|
45
|
+
if (!builder.needsMore()) {
|
46
|
+
LOGGER.warn("Too many files matched, stop listing file");
|
47
|
+
return;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
} while (hasNext());
|
52
|
+
}
|
53
|
+
|
54
|
+
protected abstract List<S3ObjectSummary> fetch();
|
55
|
+
|
56
|
+
protected abstract boolean hasNext();
|
57
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
6
|
+
import org.embulk.input.s3.DefaultRetryable;
|
7
|
+
import org.embulk.input.s3.FileList;
|
8
|
+
import org.embulk.spi.util.RetryExecutor;
|
9
|
+
|
10
|
+
public class S3SingleFileExplorer extends S3FileExplorer
|
11
|
+
{
|
12
|
+
private final String path;
|
13
|
+
|
14
|
+
public S3SingleFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor, final String path)
|
15
|
+
{
|
16
|
+
super(bucket, client, retryExecutor);
|
17
|
+
this.path = path;
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public void addToBuilder(final FileList.Builder builder)
|
22
|
+
{
|
23
|
+
final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucketName, path);
|
24
|
+
|
25
|
+
final ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
|
26
|
+
@Override
|
27
|
+
public ObjectMetadata call()
|
28
|
+
{
|
29
|
+
return s3Client.getObjectMetadata(objectMetadataRequest);
|
30
|
+
}
|
31
|
+
}.executeWith(retryExecutor);
|
32
|
+
|
33
|
+
builder.add(path, objectMetadata.getContentLength());
|
34
|
+
}
|
35
|
+
}
|
@@ -0,0 +1,70 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
7
|
+
import org.apache.commons.lang3.StringUtils;
|
8
|
+
import org.embulk.input.s3.DefaultRetryable;
|
9
|
+
import org.embulk.spi.Exec;
|
10
|
+
import org.embulk.spi.util.RetryExecutor;
|
11
|
+
import org.slf4j.Logger;
|
12
|
+
|
13
|
+
import java.util.Date;
|
14
|
+
import java.util.List;
|
15
|
+
import java.util.Optional;
|
16
|
+
import java.util.stream.Collectors;
|
17
|
+
|
18
|
+
public class S3TimeOrderPrefixFileExplorer extends S3PrefixFileExplorer
|
19
|
+
{
|
20
|
+
private static final Logger LOGGER = Exec.getLogger(S3TimeOrderPrefixFileExplorer.class);
|
21
|
+
|
22
|
+
private final Optional<Date> from;
|
23
|
+
private final Date to;
|
24
|
+
|
25
|
+
private String lastPath;
|
26
|
+
|
27
|
+
private int numOfReq = 0;
|
28
|
+
|
29
|
+
public S3TimeOrderPrefixFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor,
|
30
|
+
final String pathPrefix, final boolean skipGlacierObjects, final Optional<Date> from, final Date to)
|
31
|
+
{
|
32
|
+
super(bucket, client, retryExecutor, pathPrefix, skipGlacierObjects);
|
33
|
+
this.from = from;
|
34
|
+
this.to = to;
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public List<S3ObjectSummary> fetch()
|
39
|
+
{
|
40
|
+
++numOfReq;
|
41
|
+
|
42
|
+
final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
|
43
|
+
final ObjectListing objectListing = new DefaultRetryable<ObjectListing>("Listing objects")
|
44
|
+
{
|
45
|
+
@Override
|
46
|
+
public ObjectListing call()
|
47
|
+
{
|
48
|
+
return s3Client.listObjects(req);
|
49
|
+
}
|
50
|
+
}.executeWith(retryExecutor);
|
51
|
+
lastPath = objectListing.getNextMarker();
|
52
|
+
|
53
|
+
return objectListing.getObjectSummaries()
|
54
|
+
.stream()
|
55
|
+
.filter(s3ObjectSummary -> s3ObjectSummary.getLastModified().before(to)
|
56
|
+
&& (!from.isPresent() || s3ObjectSummary.getLastModified().equals(from.get()) || s3ObjectSummary.getLastModified().after(from.get())))
|
57
|
+
.collect(Collectors.toList());
|
58
|
+
}
|
59
|
+
|
60
|
+
@Override
|
61
|
+
public boolean hasNext()
|
62
|
+
{
|
63
|
+
if (lastPath == null) {
|
64
|
+
LOGGER.info("The total number of LIST requests is {}{}.", numOfReq,
|
65
|
+
numOfReq < 10 ? StringUtils.EMPTY : ". Clean up your s3 bucket to reduce the number of requests and improve the ingesting performance");
|
66
|
+
return false;
|
67
|
+
}
|
68
|
+
return true;
|
69
|
+
}
|
70
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
package org.embulk.input.s3.utils;
|
2
|
+
|
3
|
+
import com.google.common.base.Joiner;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.joda.time.format.DateTimeFormat;
|
6
|
+
|
7
|
+
import java.util.Date;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class DateUtils
|
11
|
+
{
|
12
|
+
public static Date parse(final String value, final List<String> supportedFormats)
|
13
|
+
throws ConfigException
|
14
|
+
{
|
15
|
+
for (final String fmt : supportedFormats) {
|
16
|
+
try {
|
17
|
+
return DateTimeFormat.forPattern(fmt).parseDateTime(value).toDate();
|
18
|
+
} catch (final IllegalArgumentException e) {
|
19
|
+
// ignorable exception
|
20
|
+
}
|
21
|
+
}
|
22
|
+
throw new ConfigException("Unsupported DateTime value: '" + value + "', supported formats: [" + Joiner.on(",").join(supportedFormats) + "]");
|
23
|
+
}
|
24
|
+
|
25
|
+
private DateUtils()
|
26
|
+
{
|
27
|
+
}
|
28
|
+
}
|
@@ -1,16 +1,11 @@
|
|
1
1
|
package org.embulk.input.s3;
|
2
2
|
|
3
3
|
import com.amazonaws.services.s3.AmazonS3;
|
4
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
6
4
|
import com.amazonaws.services.s3.model.Region;
|
7
|
-
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
8
|
-
import com.amazonaws.services.s3.model.StorageClass;
|
9
5
|
import com.google.common.collect.ImmutableList;
|
10
6
|
import com.google.common.collect.ImmutableMap;
|
11
7
|
import org.embulk.EmbulkTestRuntime;
|
12
8
|
import org.embulk.config.ConfigDiff;
|
13
|
-
import org.embulk.config.ConfigException;
|
14
9
|
import org.embulk.config.ConfigSource;
|
15
10
|
import org.embulk.config.TaskReport;
|
16
11
|
import org.embulk.config.TaskSource;
|
@@ -25,21 +20,15 @@ import org.junit.Before;
|
|
25
20
|
import org.junit.BeforeClass;
|
26
21
|
import org.junit.Rule;
|
27
22
|
import org.junit.Test;
|
28
|
-
import org.mockito.Mockito;
|
29
23
|
|
30
|
-
import java.lang.reflect.Field;
|
31
24
|
import java.util.ArrayList;
|
32
25
|
import java.util.List;
|
33
|
-
import java.util.Optional;
|
34
26
|
|
35
27
|
import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
|
36
28
|
import static org.junit.Assert.assertEquals;
|
37
29
|
import static org.junit.Assert.assertFalse;
|
38
30
|
import static org.junit.Assert.assertNull;
|
39
31
|
import static org.junit.Assume.assumeNotNull;
|
40
|
-
import static org.mockito.Matchers.any;
|
41
|
-
import static org.mockito.Mockito.doReturn;
|
42
|
-
import static org.mockito.Mockito.mock;
|
43
32
|
|
44
33
|
public class TestS3FileInputPlugin
|
45
34
|
{
|
@@ -97,7 +86,6 @@ public class TestS3FileInputPlugin
|
|
97
86
|
|
98
87
|
@Test
|
99
88
|
public void useLastPath()
|
100
|
-
throws Exception
|
101
89
|
{
|
102
90
|
ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
|
103
91
|
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
@@ -117,7 +105,6 @@ public class TestS3FileInputPlugin
|
|
117
105
|
|
118
106
|
@Test
|
119
107
|
public void emptyFilesWithLastPath()
|
120
|
-
throws Exception
|
121
108
|
{
|
122
109
|
ConfigSource config = this.config.deepCopy()
|
123
110
|
.set("path_prefix", "empty_files_prefix")
|
@@ -130,7 +117,6 @@ public class TestS3FileInputPlugin
|
|
130
117
|
|
131
118
|
@Test
|
132
119
|
public void useTotalFileCountLimit()
|
133
|
-
throws Exception
|
134
120
|
{
|
135
121
|
ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
|
136
122
|
ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
|
@@ -141,7 +127,6 @@ public class TestS3FileInputPlugin
|
|
141
127
|
|
142
128
|
@Test
|
143
129
|
public void usePathMatchPattern()
|
144
|
-
throws Exception
|
145
130
|
{
|
146
131
|
{ // match pattern
|
147
132
|
ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/sample_01");
|
@@ -227,44 +212,6 @@ public class TestS3FileInputPlugin
|
|
227
212
|
assertEquals(s3Client.getRegion(), Region.US_Standard);
|
228
213
|
}
|
229
214
|
|
230
|
-
@Test(expected = ConfigException.class)
|
231
|
-
public void useSkipGlacierObjects() throws Exception
|
232
|
-
{
|
233
|
-
AmazonS3 client;
|
234
|
-
client = mock(AmazonS3.class);
|
235
|
-
doReturn(s3objectList("in/aa/a", StorageClass.Glacier)).when(client).listObjects(any(ListObjectsRequest.class));
|
236
|
-
|
237
|
-
AbstractS3FileInputPlugin plugin = Mockito.mock(AbstractS3FileInputPlugin.class, Mockito.CALLS_REAL_METHODS);
|
238
|
-
plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional.empty(), false);
|
239
|
-
}
|
240
|
-
|
241
|
-
private FileList.Builder newFileList(ConfigSource config, Object... nameAndSize)
|
242
|
-
{
|
243
|
-
FileList.Builder builder = new FileList.Builder(config);
|
244
|
-
for (int i = 0; i < nameAndSize.length; i += 2) {
|
245
|
-
builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
|
246
|
-
}
|
247
|
-
return builder;
|
248
|
-
}
|
249
|
-
|
250
|
-
private ObjectListing s3objectList(String key, StorageClass storageClass) throws Exception
|
251
|
-
{
|
252
|
-
ObjectListing list = new ObjectListing();
|
253
|
-
|
254
|
-
S3ObjectSummary element = new S3ObjectSummary();
|
255
|
-
element.setKey(key);
|
256
|
-
element.setStorageClass(storageClass.toString());
|
257
|
-
|
258
|
-
List<S3ObjectSummary> objectSummaries = new ArrayList<>();
|
259
|
-
objectSummaries.add(element);
|
260
|
-
|
261
|
-
Field field = list.getClass().getDeclaredField("objectSummaries");
|
262
|
-
field.setAccessible(true);
|
263
|
-
field.set(list, objectSummaries);
|
264
|
-
|
265
|
-
return list;
|
266
|
-
}
|
267
|
-
|
268
215
|
static class Control
|
269
216
|
implements InputPlugin.Control
|
270
217
|
{
|
@@ -0,0 +1,67 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import org.embulk.EmbulkTestRuntime;
|
7
|
+
import org.junit.Before;
|
8
|
+
import org.junit.Rule;
|
9
|
+
import org.junit.Test;
|
10
|
+
import org.junit.runner.RunWith;
|
11
|
+
import org.mockito.ArgumentCaptor;
|
12
|
+
import org.mockito.Mock;
|
13
|
+
import org.mockito.internal.util.reflection.FieldSetter;
|
14
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
15
|
+
|
16
|
+
import static org.junit.Assert.assertEquals;
|
17
|
+
import static org.junit.Assert.assertFalse;
|
18
|
+
import static org.mockito.Matchers.any;
|
19
|
+
import static org.mockito.Mockito.mock;
|
20
|
+
import static org.mockito.Mockito.verify;
|
21
|
+
import static org.mockito.Mockito.when;
|
22
|
+
|
23
|
+
@RunWith(MockitoJUnitRunner.class)
|
24
|
+
public class TestS3NameOrderPrefixFileExplorer
|
25
|
+
{
|
26
|
+
private static final String BUCKET_NAME = "bucket_name";
|
27
|
+
private static final String PATH_PREFIX = "path_prefix";
|
28
|
+
private static final String LAST_PATH = "last_path";
|
29
|
+
|
30
|
+
@Rule
|
31
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
32
|
+
|
33
|
+
@Mock
|
34
|
+
private AmazonS3 s3Client;
|
35
|
+
|
36
|
+
private S3NameOrderPrefixFileExplorer s3NameOrderPrefixFileExplorer;
|
37
|
+
|
38
|
+
@Before
|
39
|
+
public void setUp()
|
40
|
+
{
|
41
|
+
s3NameOrderPrefixFileExplorer = new S3NameOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false, LAST_PATH);
|
42
|
+
}
|
43
|
+
|
44
|
+
@Test
|
45
|
+
public void fetch_should_return_list_objects()
|
46
|
+
{
|
47
|
+
final ObjectListing ol = mock(ObjectListing.class);
|
48
|
+
when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
|
49
|
+
|
50
|
+
s3NameOrderPrefixFileExplorer.fetch();
|
51
|
+
final ArgumentCaptor<ListObjectsRequest> listObjectsRequestCaptor = ArgumentCaptor.forClass(ListObjectsRequest.class);
|
52
|
+
|
53
|
+
verify(ol).getNextMarker();
|
54
|
+
verify(s3Client).listObjects(listObjectsRequestCaptor.capture());
|
55
|
+
final ListObjectsRequest listObjectsRequest = listObjectsRequestCaptor.getValue();
|
56
|
+
assertEquals(BUCKET_NAME, listObjectsRequest.getBucketName());
|
57
|
+
assertEquals(PATH_PREFIX, listObjectsRequest.getPrefix());
|
58
|
+
assertEquals(LAST_PATH, listObjectsRequest.getMarker());
|
59
|
+
}
|
60
|
+
|
61
|
+
@Test
|
62
|
+
public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
|
63
|
+
{
|
64
|
+
new FieldSetter(s3NameOrderPrefixFileExplorer, s3NameOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
|
65
|
+
assertFalse(s3NameOrderPrefixFileExplorer.hasNext());
|
66
|
+
}
|
67
|
+
}
|
@@ -0,0 +1,128 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
5
|
+
import com.amazonaws.services.s3.model.StorageClass;
|
6
|
+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
7
|
+
import org.embulk.EmbulkTestRuntime;
|
8
|
+
import org.embulk.config.ConfigException;
|
9
|
+
import org.embulk.input.s3.FileList;
|
10
|
+
import org.embulk.spi.util.RetryExecutor;
|
11
|
+
import org.junit.Before;
|
12
|
+
import org.junit.Rule;
|
13
|
+
import org.junit.Test;
|
14
|
+
import org.junit.runner.RunWith;
|
15
|
+
import org.mockito.Mock;
|
16
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
17
|
+
|
18
|
+
import java.util.Collections;
|
19
|
+
import java.util.List;
|
20
|
+
|
21
|
+
import static org.mockito.Mockito.doReturn;
|
22
|
+
import static org.mockito.Mockito.never;
|
23
|
+
import static org.mockito.Mockito.spy;
|
24
|
+
import static org.mockito.Mockito.times;
|
25
|
+
import static org.mockito.Mockito.verify;
|
26
|
+
import static org.mockito.Mockito.when;
|
27
|
+
|
28
|
+
@RunWith(MockitoJUnitRunner.class)
|
29
|
+
public class TestS3PrefixFileExplorer
|
30
|
+
{
|
31
|
+
private static final String PATH_PREFIX = "path_prefix";
|
32
|
+
private static final String BUCKET_NAME = "bucket_name";
|
33
|
+
private static final String OBJECT_KEY = "key";
|
34
|
+
|
35
|
+
@SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
|
36
|
+
@Rule
|
37
|
+
public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
|
38
|
+
|
39
|
+
@Mock
|
40
|
+
private AmazonS3 s3Client;
|
41
|
+
|
42
|
+
@Mock
|
43
|
+
private FileList.Builder builder;
|
44
|
+
|
45
|
+
@Mock
|
46
|
+
private S3ObjectSummary s3ObjectSummary;
|
47
|
+
|
48
|
+
private S3PrefixFileExplorer s3PrefixFileExplorer;
|
49
|
+
|
50
|
+
@Before
|
51
|
+
public void setUp()
|
52
|
+
{
|
53
|
+
s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false);
|
54
|
+
doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
|
55
|
+
}
|
56
|
+
|
57
|
+
@Test(expected = ConfigException.class)
|
58
|
+
public void addToBuilder_should_throw_exception_if_notskipped_glacier_storage()
|
59
|
+
{
|
60
|
+
when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
|
61
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
62
|
+
}
|
63
|
+
|
64
|
+
@Test
|
65
|
+
public void addToBuilder_should_skip_glacier_storage_if_allowed()
|
66
|
+
{
|
67
|
+
when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
|
68
|
+
// override spied object for changing `skipGlacierObjects`
|
69
|
+
s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
|
70
|
+
doReturn(false).when(s3PrefixFileExplorer).hasNext();
|
71
|
+
doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
|
72
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
73
|
+
|
74
|
+
verify(s3PrefixFileExplorer).hasNext();
|
75
|
+
verify(s3ObjectSummary, never()).getSize();
|
76
|
+
}
|
77
|
+
|
78
|
+
@Test
|
79
|
+
public void addToBuilder_should_loop_till_nothing_left()
|
80
|
+
{
|
81
|
+
// There are 3 loops totally but only 2 keys have been imported because the first key is in Glacier storage class and is skipped
|
82
|
+
when(builder.needsMore()).thenReturn(true);
|
83
|
+
// override spied object for changing `skipGlacierObjects`
|
84
|
+
s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
|
85
|
+
when(s3ObjectSummary.getStorageClass())
|
86
|
+
.thenReturn(StorageClass.Glacier.toString())
|
87
|
+
.thenReturn(StorageClass.Standard.toString());
|
88
|
+
when(s3ObjectSummary.getSize()).thenReturn(1L);
|
89
|
+
when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
|
90
|
+
doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
|
91
|
+
doReturn(true).doReturn(true).doReturn(false).when(s3PrefixFileExplorer).hasNext();
|
92
|
+
|
93
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
94
|
+
verify(builder, times(2)).add(PATH_PREFIX + OBJECT_KEY, 1);
|
95
|
+
}
|
96
|
+
|
97
|
+
@Test
|
98
|
+
public void addToBuilder_should_stop_import_if_too_many_files()
|
99
|
+
{
|
100
|
+
when(builder.needsMore()).thenReturn(false);
|
101
|
+
when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Standard.toString());
|
102
|
+
when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
|
103
|
+
when(s3ObjectSummary.getSize()).thenReturn(1L);
|
104
|
+
doReturn(true).when(s3PrefixFileExplorer).hasNext();
|
105
|
+
s3PrefixFileExplorer.addToBuilder(builder);
|
106
|
+
|
107
|
+
verify(builder).add(PATH_PREFIX + OBJECT_KEY, 1);
|
108
|
+
verify(s3PrefixFileExplorer, never()).hasNext();
|
109
|
+
}
|
110
|
+
|
111
|
+
private S3PrefixFileExplorer spyS3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
|
112
|
+
{
|
113
|
+
return spy(new S3PrefixFileExplorer(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects)
|
114
|
+
{
|
115
|
+
@Override
|
116
|
+
protected List<S3ObjectSummary> fetch()
|
117
|
+
{
|
118
|
+
return null;
|
119
|
+
}
|
120
|
+
|
121
|
+
@Override
|
122
|
+
protected boolean hasNext()
|
123
|
+
{
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
});
|
127
|
+
}
|
128
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectMetadata;
|
6
|
+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
7
|
+
import org.embulk.EmbulkTestRuntime;
|
8
|
+
import org.embulk.input.s3.FileList;
|
9
|
+
import org.junit.Before;
|
10
|
+
import org.junit.Rule;
|
11
|
+
import org.junit.Test;
|
12
|
+
import org.junit.runner.RunWith;
|
13
|
+
import org.mockito.Mock;
|
14
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
15
|
+
|
16
|
+
import static org.mockito.Matchers.any;
|
17
|
+
import static org.mockito.Mockito.verify;
|
18
|
+
import static org.mockito.Mockito.when;
|
19
|
+
|
20
|
+
@RunWith(MockitoJUnitRunner.class)
|
21
|
+
public class TestS3SingleFileExplorer
|
22
|
+
{
|
23
|
+
private static final String PATH = "path";
|
24
|
+
private static final String BUCKET_NAME = "bucket_name";
|
25
|
+
|
26
|
+
@SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
|
27
|
+
@Rule
|
28
|
+
public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
|
29
|
+
|
30
|
+
@Mock
|
31
|
+
private AmazonS3 s3Client;
|
32
|
+
|
33
|
+
@Mock
|
34
|
+
private FileList.Builder builder;
|
35
|
+
|
36
|
+
@Mock
|
37
|
+
private ObjectMetadata metadata;
|
38
|
+
|
39
|
+
private S3SingleFileExplorer s3SingleFileExplorer;
|
40
|
+
|
41
|
+
@Before
|
42
|
+
public void setUp()
|
43
|
+
{
|
44
|
+
s3SingleFileExplorer = new S3SingleFileExplorer(BUCKET_NAME, s3Client, null, PATH);
|
45
|
+
}
|
46
|
+
|
47
|
+
@Test
|
48
|
+
public void addToBuilder_should_request_single_object_metadata()
|
49
|
+
{
|
50
|
+
when(s3Client.getObjectMetadata(any(GetObjectMetadataRequest.class))).thenReturn(metadata);
|
51
|
+
when(metadata.getContentLength()).thenReturn(1L);
|
52
|
+
s3SingleFileExplorer.addToBuilder(builder);
|
53
|
+
|
54
|
+
verify(builder).add(PATH, 1);
|
55
|
+
}
|
56
|
+
}
|
@@ -0,0 +1,112 @@
|
|
1
|
+
package org.embulk.input.s3.explorer;
|
2
|
+
|
3
|
+
import com.amazonaws.services.s3.AmazonS3;
|
4
|
+
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
5
|
+
import com.amazonaws.services.s3.model.ObjectListing;
|
6
|
+
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
7
|
+
import org.embulk.EmbulkTestRuntime;
|
8
|
+
import org.junit.Before;
|
9
|
+
import org.junit.Rule;
|
10
|
+
import org.junit.Test;
|
11
|
+
import org.junit.runner.RunWith;
|
12
|
+
import org.mockito.Mock;
|
13
|
+
import org.mockito.internal.util.reflection.FieldSetter;
|
14
|
+
import org.mockito.runners.MockitoJUnitRunner;
|
15
|
+
|
16
|
+
import java.util.Arrays;
|
17
|
+
import java.util.Calendar;
|
18
|
+
import java.util.List;
|
19
|
+
import java.util.Optional;
|
20
|
+
|
21
|
+
import static org.junit.Assert.assertEquals;
|
22
|
+
import static org.junit.Assert.assertFalse;
|
23
|
+
import static org.mockito.Matchers.any;
|
24
|
+
import static org.mockito.Mockito.mock;
|
25
|
+
import static org.mockito.Mockito.when;
|
26
|
+
|
27
|
+
@RunWith(MockitoJUnitRunner.class)
|
28
|
+
public class TestS3TimeOrderPrefixFileExplorer
|
29
|
+
{
|
30
|
+
private static final String BUCKET_NAME = "bucket_name";
|
31
|
+
private static final String PATH_PREFIX = "path_prefix";
|
32
|
+
|
33
|
+
@Rule
|
34
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
35
|
+
|
36
|
+
@Mock
|
37
|
+
private AmazonS3 s3Client;
|
38
|
+
|
39
|
+
private S3TimeOrderPrefixFileExplorer s3TimeOrderPrefixFileExplorer;
|
40
|
+
|
41
|
+
@Before
|
42
|
+
public void setUp()
|
43
|
+
{
|
44
|
+
final Calendar cal = Calendar.getInstance();
|
45
|
+
cal.set(2019, Calendar.MAY, 25, 10, 0);
|
46
|
+
s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
|
47
|
+
false, Optional.empty(), cal.getTime());
|
48
|
+
}
|
49
|
+
|
50
|
+
@Test
|
51
|
+
public void fetch_should_return_filtered_objects_before_end_time()
|
52
|
+
{
|
53
|
+
final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
|
54
|
+
final Calendar cal = Calendar.getInstance();
|
55
|
+
cal.set(2019, Calendar.MAY, 24, 10, 0);
|
56
|
+
when(s3ObjectBefore.getLastModified()).thenReturn(cal.getTime());
|
57
|
+
|
58
|
+
final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
|
59
|
+
cal.set(2019, Calendar.MAY, 26, 10, 0);
|
60
|
+
when(s3ObjectAfter.getLastModified()).thenReturn(cal.getTime());
|
61
|
+
|
62
|
+
final ObjectListing ol = mock(ObjectListing.class);
|
63
|
+
when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
|
64
|
+
when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectBefore, s3ObjectAfter));
|
65
|
+
|
66
|
+
final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
|
67
|
+
assertEquals(1, result.size());
|
68
|
+
assertEquals(s3ObjectBefore, result.get(0));
|
69
|
+
}
|
70
|
+
|
71
|
+
@Test
|
72
|
+
public void fetch_should_return_filtered_objects_after_or_equals_begin_time()
|
73
|
+
{
|
74
|
+
final Calendar to = Calendar.getInstance();
|
75
|
+
to.set(2019, Calendar.MAY, 25, 10, 0);
|
76
|
+
final Calendar from = Calendar.getInstance();
|
77
|
+
from.set(2019, Calendar.MAY, 24, 10, 0);
|
78
|
+
s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
|
79
|
+
false, Optional.of(from.getTime()), to.getTime());
|
80
|
+
|
81
|
+
final S3ObjectSummary s3ObjectEqual = mock(S3ObjectSummary.class);
|
82
|
+
final Calendar equalCal = Calendar.getInstance();
|
83
|
+
equalCal.set(2019, Calendar.MAY, 24, 10, 0);
|
84
|
+
when(s3ObjectEqual.getLastModified()).thenReturn(equalCal.getTime());
|
85
|
+
|
86
|
+
final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
|
87
|
+
final Calendar beforeCal = Calendar.getInstance();
|
88
|
+
beforeCal.set(2019, Calendar.MAY, 24, 20, 0);
|
89
|
+
when(s3ObjectBefore.getLastModified()).thenReturn(beforeCal.getTime());
|
90
|
+
|
91
|
+
final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
|
92
|
+
final Calendar afterCal = Calendar.getInstance();
|
93
|
+
afterCal.set(2019, Calendar.MAY, 26, 10, 0);
|
94
|
+
when(s3ObjectAfter.getLastModified()).thenReturn(afterCal.getTime());
|
95
|
+
|
96
|
+
final ObjectListing ol = mock(ObjectListing.class);
|
97
|
+
when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
|
98
|
+
when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectEqual, s3ObjectBefore, s3ObjectAfter));
|
99
|
+
|
100
|
+
final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
|
101
|
+
assertEquals(2, result.size());
|
102
|
+
assertEquals(s3ObjectEqual, result.get(0));
|
103
|
+
assertEquals(s3ObjectBefore, result.get(1));
|
104
|
+
}
|
105
|
+
|
106
|
+
@Test
|
107
|
+
public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
|
108
|
+
{
|
109
|
+
new FieldSetter(s3TimeOrderPrefixFileExplorer, s3TimeOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
|
110
|
+
assertFalse(s3TimeOrderPrefixFileExplorer.hasNext());
|
111
|
+
}
|
112
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,19 +53,28 @@ files:
|
|
53
53
|
- src/main/java/org/embulk/input/s3/HttpProxy.java
|
54
54
|
- src/main/java/org/embulk/input/s3/RetrySupportPluginTask.java
|
55
55
|
- src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
|
56
|
-
- src/
|
56
|
+
- src/main/java/org/embulk/input/s3/explorer/S3FileExplorer.java
|
57
|
+
- src/main/java/org/embulk/input/s3/explorer/S3NameOrderPrefixFileExplorer.java
|
58
|
+
- src/main/java/org/embulk/input/s3/explorer/S3PrefixFileExplorer.java
|
59
|
+
- src/main/java/org/embulk/input/s3/explorer/S3SingleFileExplorer.java
|
60
|
+
- src/main/java/org/embulk/input/s3/explorer/S3TimeOrderPrefixFileExplorer.java
|
61
|
+
- src/main/java/org/embulk/input/s3/utils/DateUtils.java
|
57
62
|
- src/test/java/org/embulk/input/s3/TestAwsCredentials.java
|
58
63
|
- src/test/java/org/embulk/input/s3/TestDefaultRetryable.java
|
59
64
|
- src/test/java/org/embulk/input/s3/TestFileList.java
|
60
65
|
- src/test/java/org/embulk/input/s3/TestHttpProxy.java
|
61
66
|
- src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
|
62
67
|
- src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
|
68
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3NameOrderPrefixFileExplorer.java
|
69
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3PrefixFileExplorer.java
|
70
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3SingleFileExplorer.java
|
71
|
+
- src/test/java/org/embulk/input/s3/explorer/TestS3TimeOrderPrefixFileExplorer.java
|
63
72
|
- src/test/resources/sample_01.csv
|
64
|
-
- classpath/embulk-util-aws-credentials-0.3.
|
73
|
+
- classpath/embulk-util-aws-credentials-0.3.4.jar
|
65
74
|
- classpath/httpcore-4.4.9.jar
|
66
75
|
- classpath/httpclient-4.5.5.jar
|
67
76
|
- classpath/ion-java-1.0.2.jar
|
68
|
-
- classpath/embulk-input-s3-0.3.
|
77
|
+
- classpath/embulk-input-s3-0.3.4.jar
|
69
78
|
- classpath/aws-java-sdk-core-1.11.466.jar
|
70
79
|
- classpath/jcl-over-slf4j-1.7.12.jar
|
71
80
|
- classpath/commons-codec-1.10.jar
|
Binary file
|
@@ -1,164 +0,0 @@
|
|
1
|
-
package org.embulk.input.s3;
|
2
|
-
|
3
|
-
import com.amazonaws.AmazonServiceException;
|
4
|
-
import com.amazonaws.services.s3.AmazonS3;
|
5
|
-
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
|
6
|
-
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
7
|
-
import com.amazonaws.services.s3.model.ObjectListing;
|
8
|
-
import com.amazonaws.services.s3.model.ObjectMetadata;
|
9
|
-
import org.apache.http.HttpStatus;
|
10
|
-
import org.embulk.EmbulkTestRuntime;
|
11
|
-
import org.embulk.spi.util.RetryExecutor;
|
12
|
-
import org.junit.Before;
|
13
|
-
import org.junit.Rule;
|
14
|
-
import org.junit.Test;
|
15
|
-
|
16
|
-
import java.util.Optional;
|
17
|
-
|
18
|
-
import static org.mockito.Matchers.any;
|
19
|
-
import static org.mockito.Mockito.doReturn;
|
20
|
-
import static org.mockito.Mockito.doThrow;
|
21
|
-
import static org.mockito.Mockito.mock;
|
22
|
-
|
23
|
-
public class TestAbstractS3FileInputPlugin
|
24
|
-
{
|
25
|
-
private static RetryExecutor retryExecutor()
|
26
|
-
{
|
27
|
-
return RetryExecutor.retryExecutor()
|
28
|
-
.withInitialRetryWait(0)
|
29
|
-
.withMaxRetryWait(0);
|
30
|
-
}
|
31
|
-
|
32
|
-
private static AbstractS3FileInputPlugin dummyS3Plugin()
|
33
|
-
{
|
34
|
-
return new AbstractS3FileInputPlugin()
|
35
|
-
{
|
36
|
-
@Override
|
37
|
-
protected Class<? extends PluginTask> getTaskClass()
|
38
|
-
{
|
39
|
-
return PluginTask.class;
|
40
|
-
}
|
41
|
-
};
|
42
|
-
}
|
43
|
-
|
44
|
-
private static class SomeException extends RuntimeException
|
45
|
-
{
|
46
|
-
}
|
47
|
-
|
48
|
-
@Rule
|
49
|
-
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
50
|
-
|
51
|
-
private AmazonS3 client;
|
52
|
-
|
53
|
-
@Before
|
54
|
-
public void createResources()
|
55
|
-
{
|
56
|
-
client = mock(AmazonS3.class);
|
57
|
-
}
|
58
|
-
|
59
|
-
@Test
|
60
|
-
public void listS3FilesByPrefix()
|
61
|
-
{
|
62
|
-
doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
|
63
|
-
FileList.Builder builder = new FileList.Builder();
|
64
|
-
dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
|
65
|
-
}
|
66
|
-
|
67
|
-
@Test
|
68
|
-
public void listS3FileByPrefix_with_retry()
|
69
|
-
{
|
70
|
-
doThrow(new RuntimeException()).doReturn(new ObjectListing())
|
71
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
72
|
-
FileList.Builder builder = new FileList.Builder();
|
73
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
74
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
75
|
-
retryExecutor().withRetryLimit(1));
|
76
|
-
}
|
77
|
-
|
78
|
-
@Test(expected = SomeException.class)
|
79
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
|
80
|
-
{
|
81
|
-
doThrow(new SomeException()).doReturn(new ObjectListing())
|
82
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
83
|
-
FileList.Builder builder = new FileList.Builder();
|
84
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
85
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
86
|
-
retryExecutor().withRetryLimit(0));
|
87
|
-
}
|
88
|
-
|
89
|
-
@Test(expected = AmazonServiceException.class)
|
90
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_forbidden_code()
|
91
|
-
{
|
92
|
-
AmazonServiceException exception = new AmazonServiceException("Forbidden exception");
|
93
|
-
exception.setStatusCode(HttpStatus.SC_FORBIDDEN);
|
94
|
-
exception.setErrorType(AmazonServiceException.ErrorType.Client);
|
95
|
-
|
96
|
-
doThrow(exception).doReturn(new ObjectListing())
|
97
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
98
|
-
FileList.Builder builder = new FileList.Builder();
|
99
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
100
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
101
|
-
retryExecutor().withRetryLimit(1));
|
102
|
-
}
|
103
|
-
|
104
|
-
@Test(expected = AmazonServiceException.class)
|
105
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_methodnotallow_code()
|
106
|
-
{
|
107
|
-
AmazonServiceException exception = new AmazonServiceException("method not allow exception");
|
108
|
-
exception.setStatusCode(HttpStatus.SC_METHOD_NOT_ALLOWED);
|
109
|
-
exception.setErrorType(AmazonServiceException.ErrorType.Client);
|
110
|
-
|
111
|
-
doThrow(exception).doReturn(new ObjectListing())
|
112
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
113
|
-
FileList.Builder builder = new FileList.Builder();
|
114
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
115
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
116
|
-
retryExecutor().withRetryLimit(1));
|
117
|
-
}
|
118
|
-
|
119
|
-
@Test(expected = AmazonServiceException.class)
|
120
|
-
public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_expiredToken_code()
|
121
|
-
{
|
122
|
-
AmazonServiceException exception = new AmazonServiceException("expired token exception");
|
123
|
-
exception.setStatusCode(HttpStatus.SC_BAD_REQUEST);
|
124
|
-
exception.setErrorCode("ExpiredToken");
|
125
|
-
exception.setErrorType(AmazonServiceException.ErrorType.Client);
|
126
|
-
|
127
|
-
doThrow(exception).doReturn(new ObjectListing())
|
128
|
-
.when(client).listObjects(any(ListObjectsRequest.class));
|
129
|
-
FileList.Builder builder = new FileList.Builder();
|
130
|
-
dummyS3Plugin().listS3FilesByPrefix(
|
131
|
-
builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
|
132
|
-
retryExecutor().withRetryLimit(1));
|
133
|
-
}
|
134
|
-
|
135
|
-
@Test
|
136
|
-
public void addS3DirectObject()
|
137
|
-
{
|
138
|
-
doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
|
139
|
-
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
|
140
|
-
dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
|
141
|
-
}
|
142
|
-
|
143
|
-
@Test
|
144
|
-
public void addS3DirectObject_with_retry()
|
145
|
-
{
|
146
|
-
doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
|
147
|
-
.when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
|
148
|
-
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
|
149
|
-
dummyS3Plugin().addS3DirectObject(
|
150
|
-
builder, client, "some_bucket", "some_prefix",
|
151
|
-
retryExecutor());
|
152
|
-
}
|
153
|
-
|
154
|
-
@Test(expected = SomeException.class)
|
155
|
-
public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
|
156
|
-
{
|
157
|
-
doThrow(new SomeException()).doReturn(new ObjectMetadata())
|
158
|
-
.when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
|
159
|
-
FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
|
160
|
-
dummyS3Plugin().addS3DirectObject(
|
161
|
-
builder, client, "some_bucket", "some_prefix",
|
162
|
-
retryExecutor().withRetryLimit(0));
|
163
|
-
}
|
164
|
-
}
|