embulk-input-s3 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 889fd9c88e7757c4284247122fd94b65e19f9bb7
4
- data.tar.gz: 35ad69c5264debb86359279289135512f5b67ca8
3
+ metadata.gz: 0497b4779ac08c091c1291583ef439ada4f48ea2
4
+ data.tar.gz: 0ef8f1d26751cf22d7975b570d9f7cbcfd7e270f
5
5
  SHA512:
6
- metadata.gz: 1831c5e0392336e86b98a244a8fb45008499ae062308f9dfd9f29d904e6b3c8f5deca5eee68cf2bc5ddcb770d3fbd2a6f65e3f83fbad9e42f41830dea664ffc2
7
- data.tar.gz: e24be583cbbc9442056152f9142dbfc413048f8ecbdb0a7dc65b30c0265b5fff80ac324b84598c43f4da71f32857fdcd044c0a7566dca4c23e195ef4618b397a
6
+ metadata.gz: 82310a7bae6f789ad0962346438a945b4ed59a21fc34be6bbd8e705f979482be58994a1d5d7258f07020ad72cc8dee240313b569b04081f989299a18845dbce5
7
+ data.tar.gz: 5a41d741bb26cd0d619149c8c1d4b47495c167570f450460e198a129c1b91ebc9f55b45eb771e3b58de6a69bd58f3f52029f0d272200c46f001b3ff6e24ecd13
@@ -7,15 +7,9 @@ import com.amazonaws.auth.AWSCredentialsProvider;
7
7
  import com.amazonaws.retry.PredefinedRetryPolicies;
8
8
  import com.amazonaws.services.s3.AmazonS3;
9
9
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
10
- import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
11
10
  import com.amazonaws.services.s3.model.GetObjectRequest;
12
- import com.amazonaws.services.s3.model.ListObjectsRequest;
13
- import com.amazonaws.services.s3.model.ObjectListing;
14
- import com.amazonaws.services.s3.model.ObjectMetadata;
15
11
  import com.amazonaws.services.s3.model.S3Object;
16
12
  import com.amazonaws.services.s3.model.S3ObjectInputStream;
17
- import com.amazonaws.services.s3.model.S3ObjectSummary;
18
- import com.amazonaws.services.s3.model.StorageClass;
19
13
  import com.google.common.annotations.VisibleForTesting;
20
14
  import org.embulk.config.Config;
21
15
  import org.embulk.config.ConfigDefault;
@@ -26,6 +20,10 @@ import org.embulk.config.ConfigSource;
26
20
  import org.embulk.config.Task;
27
21
  import org.embulk.config.TaskReport;
28
22
  import org.embulk.config.TaskSource;
23
+ import org.embulk.input.s3.explorer.S3NameOrderPrefixFileExplorer;
24
+ import org.embulk.input.s3.explorer.S3SingleFileExplorer;
25
+ import org.embulk.input.s3.explorer.S3TimeOrderPrefixFileExplorer;
26
+ import org.embulk.input.s3.utils.DateUtils;
29
27
  import org.embulk.spi.BufferAllocator;
30
28
  import org.embulk.spi.Exec;
31
29
  import org.embulk.spi.FileInputPlugin;
@@ -40,6 +38,9 @@ import org.slf4j.Logger;
40
38
 
41
39
  import java.io.IOException;
42
40
  import java.io.InputStream;
41
+ import java.text.SimpleDateFormat;
42
+ import java.util.Collections;
43
+ import java.util.Date;
43
44
  import java.util.Iterator;
44
45
  import java.util.List;
45
46
  import java.util.Optional;
@@ -51,6 +52,7 @@ public abstract class AbstractS3FileInputPlugin
51
52
  implements FileInputPlugin
52
53
  {
53
54
  private static final Logger LOGGER = Exec.getLogger(S3FileInputPlugin.class);
55
+ private static final String FULL_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
54
56
 
55
57
  public interface PluginTask
56
58
  extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
@@ -88,12 +90,35 @@ public abstract class AbstractS3FileInputPlugin
88
90
  @ConfigDefault("false")
89
91
  boolean getSkipGlacierObjects();
90
92
 
93
+ @Config("use_modified_time")
94
+ @ConfigDefault("false")
95
+ boolean getUseModifiedTime();
96
+
97
+ @Config("last_modified_time")
98
+ @ConfigDefault("null")
99
+ Optional<String> getLastModifiedTime();
100
+
91
101
  // TODO timeout, ssl, etc
92
102
 
103
+ ////////////////////////////////////////
104
+ // Internal configurations
105
+ ////////////////////////////////////////
106
+
93
107
  FileList getFiles();
94
108
 
95
109
  void setFiles(FileList files);
96
110
 
111
+ /**
112
+ * end_modified_time is conditionally set if modified_time mode is enabled.
113
+ *
114
+ * It is internal state and must not be set in config.yml
115
+ */
116
+ @Config("__end_modified_time")
117
+ @ConfigDefault("null")
118
+ Optional<Date> getEndModifiedTime();
119
+
120
+ void setEndModifiedTime(Optional<Date> endModifiedTime);
121
+
97
122
  @ConfigInject
98
123
  BufferAllocator getBufferAllocator();
99
124
  }
@@ -105,6 +130,7 @@ public abstract class AbstractS3FileInputPlugin
105
130
  {
106
131
  PluginTask task = config.loadConfig(getTaskClass());
107
132
 
133
+ errorIfInternalParamsAreSet(task);
108
134
  validateInputTask(task);
109
135
  // list files recursively
110
136
  task.setFiles(listFiles(task));
@@ -130,9 +156,15 @@ public abstract class AbstractS3FileInputPlugin
130
156
 
131
157
  // last_path
132
158
  if (task.getIncremental()) {
133
- Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
134
- LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
135
- configDiff.set("last_path", lastPath);
159
+ if (task.getUseModifiedTime()) {
160
+ Date endModifiedTime = task.getEndModifiedTime().orElse(new Date());
161
+ configDiff.set("last_modified_time", new SimpleDateFormat(FULL_DATE_FORMAT).format(endModifiedTime));
162
+ }
163
+ else {
164
+ Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
165
+ LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
166
+ configDiff.set("last_path", lastPath);
167
+ }
136
168
  }
137
169
  return configDiff;
138
170
  }
@@ -237,22 +269,35 @@ public abstract class AbstractS3FileInputPlugin
237
269
  String bucketName = task.getBucket();
238
270
  FileList.Builder builder = new FileList.Builder(task);
239
271
  RetryExecutor retryExec = retryExecutorFrom(task);
272
+
240
273
  if (task.getPath().isPresent()) {
241
274
  LOGGER.info("Start getting object with path: [{}]", task.getPath().get());
242
- addS3DirectObject(builder, client, task.getBucket(), task.getPath().get(), retryExec);
275
+ new S3SingleFileExplorer(bucketName, client, retryExec, task.getPath().get()).addToBuilder(builder);
276
+ return builder.build();
243
277
  }
244
- else {
245
- // does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
246
- LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
247
- if (task.getPathPrefix().get().equals("/")) {
248
- LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
249
- }
250
278
 
251
- listS3FilesByPrefix(builder, client, bucketName,
252
- task.getPathPrefix().get(), task.getLastPath(), task.getSkipGlacierObjects(), retryExec);
253
- LOGGER.info("Found total [{}] files", builder.size());
279
+ // does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
280
+ LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
281
+ if (task.getPathPrefix().get().equals("/")) {
282
+ LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
254
283
  }
255
284
 
285
+ if (task.getUseModifiedTime()) {
286
+ Date now = new Date();
287
+ Optional<Date> from = task.getLastModifiedTime().isPresent()
288
+ ? Optional.of(DateUtils.parse(task.getLastModifiedTime().get(), Collections.singletonList(FULL_DATE_FORMAT)))
289
+ : Optional.empty();
290
+ task.setEndModifiedTime(Optional.of(now));
291
+
292
+ new S3TimeOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
293
+ task.getSkipGlacierObjects(), from, now).addToBuilder(builder);
294
+ }
295
+ else {
296
+ new S3NameOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
297
+ task.getSkipGlacierObjects(), task.getLastPath().orElse(null)).addToBuilder(builder);
298
+ }
299
+
300
+ LOGGER.info("Found total [{}] files", builder.size());
256
301
  return builder.build();
257
302
  }
258
303
  catch (AmazonServiceException ex) {
@@ -268,107 +313,13 @@ public abstract class AbstractS3FileInputPlugin
268
313
  }
269
314
  }
270
315
 
271
- @VisibleForTesting
272
- public void addS3DirectObject(FileList.Builder builder,
273
- final AmazonS3 client,
274
- String bucket,
275
- String objectKey)
276
- {
277
- addS3DirectObject(builder, client, bucket, objectKey, null);
278
- }
279
-
280
- @VisibleForTesting
281
- public void addS3DirectObject(FileList.Builder builder,
282
- final AmazonS3 client,
283
- String bucket,
284
- String objectKey,
285
- RetryExecutor retryExec)
286
- {
287
- final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucket, objectKey);
288
-
289
- ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
290
- @Override
291
- public ObjectMetadata call()
292
- {
293
- return client.getObjectMetadata(objectMetadataRequest);
294
- }
295
- }.executeWith(retryExec);
296
-
297
- builder.add(objectKey, objectMetadata.getContentLength());
298
- }
299
-
300
- private void validateInputTask(PluginTask task)
316
+ private void validateInputTask(final PluginTask task)
301
317
  {
302
318
  if (!task.getPathPrefix().isPresent() && !task.getPath().isPresent()) {
303
319
  throw new ConfigException("Either path or path_prefix is required");
304
320
  }
305
321
  }
306
322
 
307
- @VisibleForTesting
308
- public static void listS3FilesByPrefix(FileList.Builder builder,
309
- final AmazonS3 client,
310
- String bucketName,
311
- String prefix,
312
- Optional<String> lastPath,
313
- boolean skipGlacierObjects)
314
- {
315
- listS3FilesByPrefix(builder, client, bucketName, prefix, lastPath, skipGlacierObjects, null);
316
- }
317
-
318
- /**
319
- * Lists S3 filenames filtered by prefix.
320
- * <p>
321
- * The resulting list does not include the file that's size == 0.
322
- * @param builder custom Filelist builder
323
- * @param client Amazon S3
324
- * @param bucketName Amazon S3 bucket name
325
- * @param prefix Amazon S3 bucket name prefix
326
- * @param lastPath last path
327
- * @param skipGlacierObjects skip gracier objects
328
- * @param retryExec a retry executor object to do the retrying
329
- */
330
- @VisibleForTesting
331
- public static void listS3FilesByPrefix(FileList.Builder builder,
332
- final AmazonS3 client,
333
- String bucketName,
334
- String prefix,
335
- Optional<String> lastPath,
336
- boolean skipGlacierObjects,
337
- RetryExecutor retryExec)
338
- {
339
- String lastKey = lastPath.orElse(null);
340
- do {
341
- final String finalLastKey = lastKey;
342
- final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
343
- ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects") {
344
- @Override
345
- public ObjectListing call()
346
- {
347
- return client.listObjects(req);
348
- }
349
- }.executeWith(retryExec);
350
- for (S3ObjectSummary s : ol.getObjectSummaries()) {
351
- if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
352
- if (skipGlacierObjects) {
353
- Exec.getLogger("AbstractS3FileInputPlugin.class").warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
354
- continue;
355
- }
356
- else {
357
- throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
358
- }
359
- }
360
- if (s.getSize() > 0) {
361
- builder.add(s.getKey(), s.getSize());
362
- if (!builder.needsMore()) {
363
- LOGGER.warn("Too many files matched, stop listing file");
364
- return;
365
- }
366
- }
367
- }
368
- lastKey = ol.getNextMarker();
369
- } while (lastKey != null);
370
- }
371
-
372
323
  @Override
373
324
  public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
374
325
  {
@@ -440,6 +391,14 @@ public abstract class AbstractS3FileInputPlugin
440
391
  }
441
392
  }
442
393
 
394
+ @VisibleForTesting
395
+ static void errorIfInternalParamsAreSet(PluginTask task)
396
+ {
397
+ if (task.getEndModifiedTime().isPresent()) {
398
+ throw new ConfigException("'__end_modified_time' must not be set.");
399
+ }
400
+ }
401
+
443
402
  // TODO create single-file InputStreamFileInput utility
444
403
  private class SingleFileProvider
445
404
  implements InputStreamFileInput.Provider
@@ -19,7 +19,7 @@ import static org.embulk.spi.util.RetryExecutor.Retryable;
19
19
  * Retryable utility, regardless the occurred exceptions,
20
20
  * Also provide a default approach for exception propagation.
21
21
  */
22
- class DefaultRetryable<T> implements Retryable<T>
22
+ public class DefaultRetryable<T> implements Retryable<T>
23
23
  {
24
24
  private static final Logger log = Exec.getLogger(DefaultRetryable.class);
25
25
  private static final Set<Integer> NONRETRYABLE_STATUS_CODES = new HashSet<Integer>(2);
@@ -0,0 +1,21 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import org.embulk.input.s3.FileList;
5
+ import org.embulk.spi.util.RetryExecutor;
6
+
7
+ public abstract class S3FileExplorer
8
+ {
9
+ protected String bucketName;
10
+ protected AmazonS3 s3Client;
11
+ protected RetryExecutor retryExecutor;
12
+
13
+ public S3FileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor)
14
+ {
15
+ this.bucketName = bucketName;
16
+ this.s3Client = s3Client;
17
+ this.retryExecutor = retryExecutor;
18
+ }
19
+
20
+ public abstract void addToBuilder(FileList.Builder builder);
21
+ }
@@ -0,0 +1,45 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
7
+ import org.embulk.input.s3.DefaultRetryable;
8
+ import org.embulk.spi.util.RetryExecutor;
9
+
10
+ import java.util.List;
11
+
12
+ public class S3NameOrderPrefixFileExplorer extends S3PrefixFileExplorer
13
+ {
14
+ private String lastPath;
15
+
16
+ public S3NameOrderPrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor,
17
+ final String pathPrefix, final boolean skipGlacierObjects, final String lastPath)
18
+ {
19
+ super(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects);
20
+ this.lastPath = lastPath;
21
+ }
22
+
23
+ @Override
24
+ protected List<S3ObjectSummary> fetch()
25
+ {
26
+ final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
27
+ final ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects")
28
+ {
29
+ @Override
30
+ public ObjectListing call()
31
+ {
32
+ return s3Client.listObjects(req);
33
+ }
34
+ }.executeWith(retryExecutor);
35
+ lastPath = ol.getNextMarker();
36
+
37
+ return ol.getObjectSummaries();
38
+ }
39
+
40
+ @Override
41
+ protected boolean hasNext()
42
+ {
43
+ return lastPath != null;
44
+ }
45
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
5
+ import com.amazonaws.services.s3.model.StorageClass;
6
+ import org.embulk.config.ConfigException;
7
+ import org.embulk.input.s3.FileList;
8
+ import org.embulk.spi.Exec;
9
+ import org.embulk.spi.util.RetryExecutor;
10
+ import org.slf4j.Logger;
11
+
12
+ import java.util.List;
13
+
14
+ public abstract class S3PrefixFileExplorer extends S3FileExplorer
15
+ {
16
+ private static final Logger LOGGER = Exec.getLogger(S3PrefixFileExplorer.class);
17
+
18
+ protected String pathPrefix;
19
+
20
+ private final boolean skipGlacierObjects;
21
+
22
+ public S3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
23
+ {
24
+ super(bucketName, s3Client, retryExecutor);
25
+ this.pathPrefix = pathPrefix;
26
+ this.skipGlacierObjects = skipGlacierObjects;
27
+ }
28
+
29
+ @Override
30
+ public void addToBuilder(final FileList.Builder builder)
31
+ {
32
+ do {
33
+ final List<S3ObjectSummary> s3ObjectSummaries = fetch();
34
+
35
+ for (final S3ObjectSummary s : s3ObjectSummaries) {
36
+ if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
37
+ if (skipGlacierObjects) {
38
+ LOGGER.warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
39
+ continue;
40
+ }
41
+ throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
42
+ }
43
+ if (s.getSize() > 0) {
44
+ builder.add(s.getKey(), s.getSize());
45
+ if (!builder.needsMore()) {
46
+ LOGGER.warn("Too many files matched, stop listing file");
47
+ return;
48
+ }
49
+ }
50
+ }
51
+ } while (hasNext());
52
+ }
53
+
54
+ protected abstract List<S3ObjectSummary> fetch();
55
+
56
+ protected abstract boolean hasNext();
57
+ }
@@ -0,0 +1,35 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
5
+ import com.amazonaws.services.s3.model.ObjectMetadata;
6
+ import org.embulk.input.s3.DefaultRetryable;
7
+ import org.embulk.input.s3.FileList;
8
+ import org.embulk.spi.util.RetryExecutor;
9
+
10
+ public class S3SingleFileExplorer extends S3FileExplorer
11
+ {
12
+ private final String path;
13
+
14
+ public S3SingleFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor, final String path)
15
+ {
16
+ super(bucket, client, retryExecutor);
17
+ this.path = path;
18
+ }
19
+
20
+ @Override
21
+ public void addToBuilder(final FileList.Builder builder)
22
+ {
23
+ final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucketName, path);
24
+
25
+ final ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
26
+ @Override
27
+ public ObjectMetadata call()
28
+ {
29
+ return s3Client.getObjectMetadata(objectMetadataRequest);
30
+ }
31
+ }.executeWith(retryExecutor);
32
+
33
+ builder.add(path, objectMetadata.getContentLength());
34
+ }
35
+ }
@@ -0,0 +1,70 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
7
+ import org.apache.commons.lang3.StringUtils;
8
+ import org.embulk.input.s3.DefaultRetryable;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.slf4j.Logger;
12
+
13
+ import java.util.Date;
14
+ import java.util.List;
15
+ import java.util.Optional;
16
+ import java.util.stream.Collectors;
17
+
18
+ public class S3TimeOrderPrefixFileExplorer extends S3PrefixFileExplorer
19
+ {
20
+ private static final Logger LOGGER = Exec.getLogger(S3TimeOrderPrefixFileExplorer.class);
21
+
22
+ private final Optional<Date> from;
23
+ private final Date to;
24
+
25
+ private String lastPath;
26
+
27
+ private int numOfReq = 0;
28
+
29
+ public S3TimeOrderPrefixFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor,
30
+ final String pathPrefix, final boolean skipGlacierObjects, final Optional<Date> from, final Date to)
31
+ {
32
+ super(bucket, client, retryExecutor, pathPrefix, skipGlacierObjects);
33
+ this.from = from;
34
+ this.to = to;
35
+ }
36
+
37
+ @Override
38
+ public List<S3ObjectSummary> fetch()
39
+ {
40
+ ++numOfReq;
41
+
42
+ final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
43
+ final ObjectListing objectListing = new DefaultRetryable<ObjectListing>("Listing objects")
44
+ {
45
+ @Override
46
+ public ObjectListing call()
47
+ {
48
+ return s3Client.listObjects(req);
49
+ }
50
+ }.executeWith(retryExecutor);
51
+ lastPath = objectListing.getNextMarker();
52
+
53
+ return objectListing.getObjectSummaries()
54
+ .stream()
55
+ .filter(s3ObjectSummary -> s3ObjectSummary.getLastModified().before(to)
56
+ && (!from.isPresent() || s3ObjectSummary.getLastModified().equals(from.get()) || s3ObjectSummary.getLastModified().after(from.get())))
57
+ .collect(Collectors.toList());
58
+ }
59
+
60
+ @Override
61
+ public boolean hasNext()
62
+ {
63
+ if (lastPath == null) {
64
+ LOGGER.info("The total number of LIST requests is {}{}.", numOfReq,
65
+ numOfReq < 10 ? StringUtils.EMPTY : ". Clean up your s3 bucket to reduce the number of requests and improve the ingesting performance");
66
+ return false;
67
+ }
68
+ return true;
69
+ }
70
+ }
@@ -0,0 +1,28 @@
1
+ package org.embulk.input.s3.utils;
2
+
3
+ import com.google.common.base.Joiner;
4
+ import org.embulk.config.ConfigException;
5
+ import org.joda.time.format.DateTimeFormat;
6
+
7
+ import java.util.Date;
8
+ import java.util.List;
9
+
10
+ public class DateUtils
11
+ {
12
+ public static Date parse(final String value, final List<String> supportedFormats)
13
+ throws ConfigException
14
+ {
15
+ for (final String fmt : supportedFormats) {
16
+ try {
17
+ return DateTimeFormat.forPattern(fmt).parseDateTime(value).toDate();
18
+ } catch (final IllegalArgumentException e) {
19
+ // ignorable exception
20
+ }
21
+ }
22
+ throw new ConfigException("Unsupported DateTime value: '" + value + "', supported formats: [" + Joiner.on(",").join(supportedFormats) + "]");
23
+ }
24
+
25
+ private DateUtils()
26
+ {
27
+ }
28
+ }
@@ -1,16 +1,11 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
3
  import com.amazonaws.services.s3.AmazonS3;
4
- import com.amazonaws.services.s3.model.ListObjectsRequest;
5
- import com.amazonaws.services.s3.model.ObjectListing;
6
4
  import com.amazonaws.services.s3.model.Region;
7
- import com.amazonaws.services.s3.model.S3ObjectSummary;
8
- import com.amazonaws.services.s3.model.StorageClass;
9
5
  import com.google.common.collect.ImmutableList;
10
6
  import com.google.common.collect.ImmutableMap;
11
7
  import org.embulk.EmbulkTestRuntime;
12
8
  import org.embulk.config.ConfigDiff;
13
- import org.embulk.config.ConfigException;
14
9
  import org.embulk.config.ConfigSource;
15
10
  import org.embulk.config.TaskReport;
16
11
  import org.embulk.config.TaskSource;
@@ -25,21 +20,15 @@ import org.junit.Before;
25
20
  import org.junit.BeforeClass;
26
21
  import org.junit.Rule;
27
22
  import org.junit.Test;
28
- import org.mockito.Mockito;
29
23
 
30
- import java.lang.reflect.Field;
31
24
  import java.util.ArrayList;
32
25
  import java.util.List;
33
- import java.util.Optional;
34
26
 
35
27
  import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
36
28
  import static org.junit.Assert.assertEquals;
37
29
  import static org.junit.Assert.assertFalse;
38
30
  import static org.junit.Assert.assertNull;
39
31
  import static org.junit.Assume.assumeNotNull;
40
- import static org.mockito.Matchers.any;
41
- import static org.mockito.Mockito.doReturn;
42
- import static org.mockito.Mockito.mock;
43
32
 
44
33
  public class TestS3FileInputPlugin
45
34
  {
@@ -97,7 +86,6 @@ public class TestS3FileInputPlugin
97
86
 
98
87
  @Test
99
88
  public void useLastPath()
100
- throws Exception
101
89
  {
102
90
  ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
103
91
  ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
@@ -117,7 +105,6 @@ public class TestS3FileInputPlugin
117
105
 
118
106
  @Test
119
107
  public void emptyFilesWithLastPath()
120
- throws Exception
121
108
  {
122
109
  ConfigSource config = this.config.deepCopy()
123
110
  .set("path_prefix", "empty_files_prefix")
@@ -130,7 +117,6 @@ public class TestS3FileInputPlugin
130
117
 
131
118
  @Test
132
119
  public void useTotalFileCountLimit()
133
- throws Exception
134
120
  {
135
121
  ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
136
122
  ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
@@ -141,7 +127,6 @@ public class TestS3FileInputPlugin
141
127
 
142
128
  @Test
143
129
  public void usePathMatchPattern()
144
- throws Exception
145
130
  {
146
131
  { // match pattern
147
132
  ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/sample_01");
@@ -227,44 +212,6 @@ public class TestS3FileInputPlugin
227
212
  assertEquals(s3Client.getRegion(), Region.US_Standard);
228
213
  }
229
214
 
230
- @Test(expected = ConfigException.class)
231
- public void useSkipGlacierObjects() throws Exception
232
- {
233
- AmazonS3 client;
234
- client = mock(AmazonS3.class);
235
- doReturn(s3objectList("in/aa/a", StorageClass.Glacier)).when(client).listObjects(any(ListObjectsRequest.class));
236
-
237
- AbstractS3FileInputPlugin plugin = Mockito.mock(AbstractS3FileInputPlugin.class, Mockito.CALLS_REAL_METHODS);
238
- plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional.empty(), false);
239
- }
240
-
241
- private FileList.Builder newFileList(ConfigSource config, Object... nameAndSize)
242
- {
243
- FileList.Builder builder = new FileList.Builder(config);
244
- for (int i = 0; i < nameAndSize.length; i += 2) {
245
- builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
246
- }
247
- return builder;
248
- }
249
-
250
- private ObjectListing s3objectList(String key, StorageClass storageClass) throws Exception
251
- {
252
- ObjectListing list = new ObjectListing();
253
-
254
- S3ObjectSummary element = new S3ObjectSummary();
255
- element.setKey(key);
256
- element.setStorageClass(storageClass.toString());
257
-
258
- List<S3ObjectSummary> objectSummaries = new ArrayList<>();
259
- objectSummaries.add(element);
260
-
261
- Field field = list.getClass().getDeclaredField("objectSummaries");
262
- field.setAccessible(true);
263
- field.set(list, objectSummaries);
264
-
265
- return list;
266
- }
267
-
268
215
  static class Control
269
216
  implements InputPlugin.Control
270
217
  {
@@ -0,0 +1,67 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import org.embulk.EmbulkTestRuntime;
7
+ import org.junit.Before;
8
+ import org.junit.Rule;
9
+ import org.junit.Test;
10
+ import org.junit.runner.RunWith;
11
+ import org.mockito.ArgumentCaptor;
12
+ import org.mockito.Mock;
13
+ import org.mockito.internal.util.reflection.FieldSetter;
14
+ import org.mockito.runners.MockitoJUnitRunner;
15
+
16
+ import static org.junit.Assert.assertEquals;
17
+ import static org.junit.Assert.assertFalse;
18
+ import static org.mockito.Matchers.any;
19
+ import static org.mockito.Mockito.mock;
20
+ import static org.mockito.Mockito.verify;
21
+ import static org.mockito.Mockito.when;
22
+
23
+ @RunWith(MockitoJUnitRunner.class)
24
+ public class TestS3NameOrderPrefixFileExplorer
25
+ {
26
+ private static final String BUCKET_NAME = "bucket_name";
27
+ private static final String PATH_PREFIX = "path_prefix";
28
+ private static final String LAST_PATH = "last_path";
29
+
30
+ @Rule
31
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
32
+
33
+ @Mock
34
+ private AmazonS3 s3Client;
35
+
36
+ private S3NameOrderPrefixFileExplorer s3NameOrderPrefixFileExplorer;
37
+
38
+ @Before
39
+ public void setUp()
40
+ {
41
+ s3NameOrderPrefixFileExplorer = new S3NameOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false, LAST_PATH);
42
+ }
43
+
44
+ @Test
45
+ public void fetch_should_return_list_objects()
46
+ {
47
+ final ObjectListing ol = mock(ObjectListing.class);
48
+ when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
49
+
50
+ s3NameOrderPrefixFileExplorer.fetch();
51
+ final ArgumentCaptor<ListObjectsRequest> listObjectsRequestCaptor = ArgumentCaptor.forClass(ListObjectsRequest.class);
52
+
53
+ verify(ol).getNextMarker();
54
+ verify(s3Client).listObjects(listObjectsRequestCaptor.capture());
55
+ final ListObjectsRequest listObjectsRequest = listObjectsRequestCaptor.getValue();
56
+ assertEquals(BUCKET_NAME, listObjectsRequest.getBucketName());
57
+ assertEquals(PATH_PREFIX, listObjectsRequest.getPrefix());
58
+ assertEquals(LAST_PATH, listObjectsRequest.getMarker());
59
+ }
60
+
61
+ @Test
62
+ public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
63
+ {
64
+ new FieldSetter(s3NameOrderPrefixFileExplorer, s3NameOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
65
+ assertFalse(s3NameOrderPrefixFileExplorer.hasNext());
66
+ }
67
+ }
@@ -0,0 +1,128 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
5
+ import com.amazonaws.services.s3.model.StorageClass;
6
+ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.embulk.config.ConfigException;
9
+ import org.embulk.input.s3.FileList;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.junit.Before;
12
+ import org.junit.Rule;
13
+ import org.junit.Test;
14
+ import org.junit.runner.RunWith;
15
+ import org.mockito.Mock;
16
+ import org.mockito.runners.MockitoJUnitRunner;
17
+
18
+ import java.util.Collections;
19
+ import java.util.List;
20
+
21
+ import static org.mockito.Mockito.doReturn;
22
+ import static org.mockito.Mockito.never;
23
+ import static org.mockito.Mockito.spy;
24
+ import static org.mockito.Mockito.times;
25
+ import static org.mockito.Mockito.verify;
26
+ import static org.mockito.Mockito.when;
27
+
28
+ @RunWith(MockitoJUnitRunner.class)
29
+ public class TestS3PrefixFileExplorer
30
+ {
31
+ private static final String PATH_PREFIX = "path_prefix";
32
+ private static final String BUCKET_NAME = "bucket_name";
33
+ private static final String OBJECT_KEY = "key";
34
+
35
+ @SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
36
+ @Rule
37
+ public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
38
+
39
+ @Mock
40
+ private AmazonS3 s3Client;
41
+
42
+ @Mock
43
+ private FileList.Builder builder;
44
+
45
+ @Mock
46
+ private S3ObjectSummary s3ObjectSummary;
47
+
48
+ private S3PrefixFileExplorer s3PrefixFileExplorer;
49
+
50
+ @Before
51
+ public void setUp()
52
+ {
53
+ s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false);
54
+ doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
55
+ }
56
+
57
+ @Test(expected = ConfigException.class)
58
+ public void addToBuilder_should_throw_exception_if_notskipped_glacier_storage()
59
+ {
60
+ when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
61
+ s3PrefixFileExplorer.addToBuilder(builder);
62
+ }
63
+
64
+ @Test
65
+ public void addToBuilder_should_skip_glacier_storage_if_allowed()
66
+ {
67
+ when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
68
+ // override spied object for changing `skipGlacierObjects`
69
+ s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
70
+ doReturn(false).when(s3PrefixFileExplorer).hasNext();
71
+ doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
72
+ s3PrefixFileExplorer.addToBuilder(builder);
73
+
74
+ verify(s3PrefixFileExplorer).hasNext();
75
+ verify(s3ObjectSummary, never()).getSize();
76
+ }
77
+
78
+ @Test
79
+ public void addToBuilder_should_loop_till_nothing_left()
80
+ {
81
+ // There are 3 loops totally but only 2 keys have been imported because the first key is in Glacier storage class and is skipped
82
+ when(builder.needsMore()).thenReturn(true);
83
+ // override spied object for changing `skipGlacierObjects`
84
+ s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
85
+ when(s3ObjectSummary.getStorageClass())
86
+ .thenReturn(StorageClass.Glacier.toString())
87
+ .thenReturn(StorageClass.Standard.toString());
88
+ when(s3ObjectSummary.getSize()).thenReturn(1L);
89
+ when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
90
+ doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
91
+ doReturn(true).doReturn(true).doReturn(false).when(s3PrefixFileExplorer).hasNext();
92
+
93
+ s3PrefixFileExplorer.addToBuilder(builder);
94
+ verify(builder, times(2)).add(PATH_PREFIX + OBJECT_KEY, 1);
95
+ }
96
+
97
+ @Test
98
+ public void addToBuilder_should_stop_import_if_too_many_files()
99
+ {
100
+ when(builder.needsMore()).thenReturn(false);
101
+ when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Standard.toString());
102
+ when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
103
+ when(s3ObjectSummary.getSize()).thenReturn(1L);
104
+ doReturn(true).when(s3PrefixFileExplorer).hasNext();
105
+ s3PrefixFileExplorer.addToBuilder(builder);
106
+
107
+ verify(builder).add(PATH_PREFIX + OBJECT_KEY, 1);
108
+ verify(s3PrefixFileExplorer, never()).hasNext();
109
+ }
110
+
111
+ private S3PrefixFileExplorer spyS3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
112
+ {
113
+ return spy(new S3PrefixFileExplorer(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects)
114
+ {
115
+ @Override
116
+ protected List<S3ObjectSummary> fetch()
117
+ {
118
+ return null;
119
+ }
120
+
121
+ @Override
122
+ protected boolean hasNext()
123
+ {
124
+ return false;
125
+ }
126
+ });
127
+ }
128
+ }
@@ -0,0 +1,56 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
5
+ import com.amazonaws.services.s3.model.ObjectMetadata;
6
+ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.embulk.input.s3.FileList;
9
+ import org.junit.Before;
10
+ import org.junit.Rule;
11
+ import org.junit.Test;
12
+ import org.junit.runner.RunWith;
13
+ import org.mockito.Mock;
14
+ import org.mockito.runners.MockitoJUnitRunner;
15
+
16
+ import static org.mockito.Matchers.any;
17
+ import static org.mockito.Mockito.verify;
18
+ import static org.mockito.Mockito.when;
19
+
20
+ @RunWith(MockitoJUnitRunner.class)
21
+ public class TestS3SingleFileExplorer
22
+ {
23
+ private static final String PATH = "path";
24
+ private static final String BUCKET_NAME = "bucket_name";
25
+
26
+ @SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
27
+ @Rule
28
+ public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
29
+
30
+ @Mock
31
+ private AmazonS3 s3Client;
32
+
33
+ @Mock
34
+ private FileList.Builder builder;
35
+
36
+ @Mock
37
+ private ObjectMetadata metadata;
38
+
39
+ private S3SingleFileExplorer s3SingleFileExplorer;
40
+
41
+ @Before
42
+ public void setUp()
43
+ {
44
+ s3SingleFileExplorer = new S3SingleFileExplorer(BUCKET_NAME, s3Client, null, PATH);
45
+ }
46
+
47
+ @Test
48
+ public void addToBuilder_should_request_single_object_metadata()
49
+ {
50
+ when(s3Client.getObjectMetadata(any(GetObjectMetadataRequest.class))).thenReturn(metadata);
51
+ when(metadata.getContentLength()).thenReturn(1L);
52
+ s3SingleFileExplorer.addToBuilder(builder);
53
+
54
+ verify(builder).add(PATH, 1);
55
+ }
56
+ }
@@ -0,0 +1,112 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.junit.Before;
9
+ import org.junit.Rule;
10
+ import org.junit.Test;
11
+ import org.junit.runner.RunWith;
12
+ import org.mockito.Mock;
13
+ import org.mockito.internal.util.reflection.FieldSetter;
14
+ import org.mockito.runners.MockitoJUnitRunner;
15
+
16
+ import java.util.Arrays;
17
+ import java.util.Calendar;
18
+ import java.util.List;
19
+ import java.util.Optional;
20
+
21
+ import static org.junit.Assert.assertEquals;
22
+ import static org.junit.Assert.assertFalse;
23
+ import static org.mockito.Matchers.any;
24
+ import static org.mockito.Mockito.mock;
25
+ import static org.mockito.Mockito.when;
26
+
27
+ @RunWith(MockitoJUnitRunner.class)
28
+ public class TestS3TimeOrderPrefixFileExplorer
29
+ {
30
+ private static final String BUCKET_NAME = "bucket_name";
31
+ private static final String PATH_PREFIX = "path_prefix";
32
+
33
+ @Rule
34
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
35
+
36
+ @Mock
37
+ private AmazonS3 s3Client;
38
+
39
+ private S3TimeOrderPrefixFileExplorer s3TimeOrderPrefixFileExplorer;
40
+
41
+ @Before
42
+ public void setUp()
43
+ {
44
+ final Calendar cal = Calendar.getInstance();
45
+ cal.set(2019, Calendar.MAY, 25, 10, 0);
46
+ s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
47
+ false, Optional.empty(), cal.getTime());
48
+ }
49
+
50
+ @Test
51
+ public void fetch_should_return_filtered_objects_before_end_time()
52
+ {
53
+ final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
54
+ final Calendar cal = Calendar.getInstance();
55
+ cal.set(2019, Calendar.MAY, 24, 10, 0);
56
+ when(s3ObjectBefore.getLastModified()).thenReturn(cal.getTime());
57
+
58
+ final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
59
+ cal.set(2019, Calendar.MAY, 26, 10, 0);
60
+ when(s3ObjectAfter.getLastModified()).thenReturn(cal.getTime());
61
+
62
+ final ObjectListing ol = mock(ObjectListing.class);
63
+ when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
64
+ when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectBefore, s3ObjectAfter));
65
+
66
+ final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
67
+ assertEquals(1, result.size());
68
+ assertEquals(s3ObjectBefore, result.get(0));
69
+ }
70
+
71
+ @Test
72
+ public void fetch_should_return_filtered_objects_after_or_equals_begin_time()
73
+ {
74
+ final Calendar to = Calendar.getInstance();
75
+ to.set(2019, Calendar.MAY, 25, 10, 0);
76
+ final Calendar from = Calendar.getInstance();
77
+ from.set(2019, Calendar.MAY, 24, 10, 0);
78
+ s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
79
+ false, Optional.of(from.getTime()), to.getTime());
80
+
81
+ final S3ObjectSummary s3ObjectEqual = mock(S3ObjectSummary.class);
82
+ final Calendar equalCal = Calendar.getInstance();
83
+ equalCal.set(2019, Calendar.MAY, 24, 10, 0);
84
+ when(s3ObjectEqual.getLastModified()).thenReturn(equalCal.getTime());
85
+
86
+ final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
87
+ final Calendar beforeCal = Calendar.getInstance();
88
+ beforeCal.set(2019, Calendar.MAY, 24, 20, 0);
89
+ when(s3ObjectBefore.getLastModified()).thenReturn(beforeCal.getTime());
90
+
91
+ final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
92
+ final Calendar afterCal = Calendar.getInstance();
93
+ afterCal.set(2019, Calendar.MAY, 26, 10, 0);
94
+ when(s3ObjectAfter.getLastModified()).thenReturn(afterCal.getTime());
95
+
96
+ final ObjectListing ol = mock(ObjectListing.class);
97
+ when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
98
+ when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectEqual, s3ObjectBefore, s3ObjectAfter));
99
+
100
+ final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
101
+ assertEquals(2, result.size());
102
+ assertEquals(s3ObjectEqual, result.get(0));
103
+ assertEquals(s3ObjectBefore, result.get(1));
104
+ }
105
+
106
+ @Test
107
+ public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
108
+ {
109
+ new FieldSetter(s3TimeOrderPrefixFileExplorer, s3TimeOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
110
+ assertFalse(s3TimeOrderPrefixFileExplorer.hasNext());
111
+ }
112
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-05 00:00:00.000000000 Z
11
+ date: 2019-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -53,19 +53,28 @@ files:
53
53
  - src/main/java/org/embulk/input/s3/HttpProxy.java
54
54
  - src/main/java/org/embulk/input/s3/RetrySupportPluginTask.java
55
55
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
56
- - src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java
56
+ - src/main/java/org/embulk/input/s3/explorer/S3FileExplorer.java
57
+ - src/main/java/org/embulk/input/s3/explorer/S3NameOrderPrefixFileExplorer.java
58
+ - src/main/java/org/embulk/input/s3/explorer/S3PrefixFileExplorer.java
59
+ - src/main/java/org/embulk/input/s3/explorer/S3SingleFileExplorer.java
60
+ - src/main/java/org/embulk/input/s3/explorer/S3TimeOrderPrefixFileExplorer.java
61
+ - src/main/java/org/embulk/input/s3/utils/DateUtils.java
57
62
  - src/test/java/org/embulk/input/s3/TestAwsCredentials.java
58
63
  - src/test/java/org/embulk/input/s3/TestDefaultRetryable.java
59
64
  - src/test/java/org/embulk/input/s3/TestFileList.java
60
65
  - src/test/java/org/embulk/input/s3/TestHttpProxy.java
61
66
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
62
67
  - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
68
+ - src/test/java/org/embulk/input/s3/explorer/TestS3NameOrderPrefixFileExplorer.java
69
+ - src/test/java/org/embulk/input/s3/explorer/TestS3PrefixFileExplorer.java
70
+ - src/test/java/org/embulk/input/s3/explorer/TestS3SingleFileExplorer.java
71
+ - src/test/java/org/embulk/input/s3/explorer/TestS3TimeOrderPrefixFileExplorer.java
63
72
  - src/test/resources/sample_01.csv
64
- - classpath/embulk-util-aws-credentials-0.3.3.jar
73
+ - classpath/embulk-util-aws-credentials-0.3.4.jar
65
74
  - classpath/httpcore-4.4.9.jar
66
75
  - classpath/httpclient-4.5.5.jar
67
76
  - classpath/ion-java-1.0.2.jar
68
- - classpath/embulk-input-s3-0.3.3.jar
77
+ - classpath/embulk-input-s3-0.3.4.jar
69
78
  - classpath/aws-java-sdk-core-1.11.466.jar
70
79
  - classpath/jcl-over-slf4j-1.7.12.jar
71
80
  - classpath/commons-codec-1.10.jar
@@ -1,164 +0,0 @@
1
- package org.embulk.input.s3;
2
-
3
- import com.amazonaws.AmazonServiceException;
4
- import com.amazonaws.services.s3.AmazonS3;
5
- import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
6
- import com.amazonaws.services.s3.model.ListObjectsRequest;
7
- import com.amazonaws.services.s3.model.ObjectListing;
8
- import com.amazonaws.services.s3.model.ObjectMetadata;
9
- import org.apache.http.HttpStatus;
10
- import org.embulk.EmbulkTestRuntime;
11
- import org.embulk.spi.util.RetryExecutor;
12
- import org.junit.Before;
13
- import org.junit.Rule;
14
- import org.junit.Test;
15
-
16
- import java.util.Optional;
17
-
18
- import static org.mockito.Matchers.any;
19
- import static org.mockito.Mockito.doReturn;
20
- import static org.mockito.Mockito.doThrow;
21
- import static org.mockito.Mockito.mock;
22
-
23
- public class TestAbstractS3FileInputPlugin
24
- {
25
- private static RetryExecutor retryExecutor()
26
- {
27
- return RetryExecutor.retryExecutor()
28
- .withInitialRetryWait(0)
29
- .withMaxRetryWait(0);
30
- }
31
-
32
- private static AbstractS3FileInputPlugin dummyS3Plugin()
33
- {
34
- return new AbstractS3FileInputPlugin()
35
- {
36
- @Override
37
- protected Class<? extends PluginTask> getTaskClass()
38
- {
39
- return PluginTask.class;
40
- }
41
- };
42
- }
43
-
44
- private static class SomeException extends RuntimeException
45
- {
46
- }
47
-
48
- @Rule
49
- public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
50
-
51
- private AmazonS3 client;
52
-
53
- @Before
54
- public void createResources()
55
- {
56
- client = mock(AmazonS3.class);
57
- }
58
-
59
- @Test
60
- public void listS3FilesByPrefix()
61
- {
62
- doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
63
- FileList.Builder builder = new FileList.Builder();
64
- dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
65
- }
66
-
67
- @Test
68
- public void listS3FileByPrefix_with_retry()
69
- {
70
- doThrow(new RuntimeException()).doReturn(new ObjectListing())
71
- .when(client).listObjects(any(ListObjectsRequest.class));
72
- FileList.Builder builder = new FileList.Builder();
73
- dummyS3Plugin().listS3FilesByPrefix(
74
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
75
- retryExecutor().withRetryLimit(1));
76
- }
77
-
78
- @Test(expected = SomeException.class)
79
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
80
- {
81
- doThrow(new SomeException()).doReturn(new ObjectListing())
82
- .when(client).listObjects(any(ListObjectsRequest.class));
83
- FileList.Builder builder = new FileList.Builder();
84
- dummyS3Plugin().listS3FilesByPrefix(
85
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
86
- retryExecutor().withRetryLimit(0));
87
- }
88
-
89
- @Test(expected = AmazonServiceException.class)
90
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_forbidden_code()
91
- {
92
- AmazonServiceException exception = new AmazonServiceException("Forbidden exception");
93
- exception.setStatusCode(HttpStatus.SC_FORBIDDEN);
94
- exception.setErrorType(AmazonServiceException.ErrorType.Client);
95
-
96
- doThrow(exception).doReturn(new ObjectListing())
97
- .when(client).listObjects(any(ListObjectsRequest.class));
98
- FileList.Builder builder = new FileList.Builder();
99
- dummyS3Plugin().listS3FilesByPrefix(
100
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
101
- retryExecutor().withRetryLimit(1));
102
- }
103
-
104
- @Test(expected = AmazonServiceException.class)
105
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_methodnotallow_code()
106
- {
107
- AmazonServiceException exception = new AmazonServiceException("method not allow exception");
108
- exception.setStatusCode(HttpStatus.SC_METHOD_NOT_ALLOWED);
109
- exception.setErrorType(AmazonServiceException.ErrorType.Client);
110
-
111
- doThrow(exception).doReturn(new ObjectListing())
112
- .when(client).listObjects(any(ListObjectsRequest.class));
113
- FileList.Builder builder = new FileList.Builder();
114
- dummyS3Plugin().listS3FilesByPrefix(
115
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
116
- retryExecutor().withRetryLimit(1));
117
- }
118
-
119
- @Test(expected = AmazonServiceException.class)
120
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_expiredToken_code()
121
- {
122
- AmazonServiceException exception = new AmazonServiceException("expired token exception");
123
- exception.setStatusCode(HttpStatus.SC_BAD_REQUEST);
124
- exception.setErrorCode("ExpiredToken");
125
- exception.setErrorType(AmazonServiceException.ErrorType.Client);
126
-
127
- doThrow(exception).doReturn(new ObjectListing())
128
- .when(client).listObjects(any(ListObjectsRequest.class));
129
- FileList.Builder builder = new FileList.Builder();
130
- dummyS3Plugin().listS3FilesByPrefix(
131
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
132
- retryExecutor().withRetryLimit(1));
133
- }
134
-
135
- @Test
136
- public void addS3DirectObject()
137
- {
138
- doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
139
- FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
140
- dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
141
- }
142
-
143
- @Test
144
- public void addS3DirectObject_with_retry()
145
- {
146
- doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
147
- .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
148
- FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
149
- dummyS3Plugin().addS3DirectObject(
150
- builder, client, "some_bucket", "some_prefix",
151
- retryExecutor());
152
- }
153
-
154
- @Test(expected = SomeException.class)
155
- public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
156
- {
157
- doThrow(new SomeException()).doReturn(new ObjectMetadata())
158
- .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
159
- FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
160
- dummyS3Plugin().addS3DirectObject(
161
- builder, client, "some_bucket", "some_prefix",
162
- retryExecutor().withRetryLimit(0));
163
- }
164
- }