embulk-input-s3 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 889fd9c88e7757c4284247122fd94b65e19f9bb7
4
- data.tar.gz: 35ad69c5264debb86359279289135512f5b67ca8
3
+ metadata.gz: 0497b4779ac08c091c1291583ef439ada4f48ea2
4
+ data.tar.gz: 0ef8f1d26751cf22d7975b570d9f7cbcfd7e270f
5
5
  SHA512:
6
- metadata.gz: 1831c5e0392336e86b98a244a8fb45008499ae062308f9dfd9f29d904e6b3c8f5deca5eee68cf2bc5ddcb770d3fbd2a6f65e3f83fbad9e42f41830dea664ffc2
7
- data.tar.gz: e24be583cbbc9442056152f9142dbfc413048f8ecbdb0a7dc65b30c0265b5fff80ac324b84598c43f4da71f32857fdcd044c0a7566dca4c23e195ef4618b397a
6
+ metadata.gz: 82310a7bae6f789ad0962346438a945b4ed59a21fc34be6bbd8e705f979482be58994a1d5d7258f07020ad72cc8dee240313b569b04081f989299a18845dbce5
7
+ data.tar.gz: 5a41d741bb26cd0d619149c8c1d4b47495c167570f450460e198a129c1b91ebc9f55b45eb771e3b58de6a69bd58f3f52029f0d272200c46f001b3ff6e24ecd13
@@ -7,15 +7,9 @@ import com.amazonaws.auth.AWSCredentialsProvider;
7
7
  import com.amazonaws.retry.PredefinedRetryPolicies;
8
8
  import com.amazonaws.services.s3.AmazonS3;
9
9
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
10
- import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
11
10
  import com.amazonaws.services.s3.model.GetObjectRequest;
12
- import com.amazonaws.services.s3.model.ListObjectsRequest;
13
- import com.amazonaws.services.s3.model.ObjectListing;
14
- import com.amazonaws.services.s3.model.ObjectMetadata;
15
11
  import com.amazonaws.services.s3.model.S3Object;
16
12
  import com.amazonaws.services.s3.model.S3ObjectInputStream;
17
- import com.amazonaws.services.s3.model.S3ObjectSummary;
18
- import com.amazonaws.services.s3.model.StorageClass;
19
13
  import com.google.common.annotations.VisibleForTesting;
20
14
  import org.embulk.config.Config;
21
15
  import org.embulk.config.ConfigDefault;
@@ -26,6 +20,10 @@ import org.embulk.config.ConfigSource;
26
20
  import org.embulk.config.Task;
27
21
  import org.embulk.config.TaskReport;
28
22
  import org.embulk.config.TaskSource;
23
+ import org.embulk.input.s3.explorer.S3NameOrderPrefixFileExplorer;
24
+ import org.embulk.input.s3.explorer.S3SingleFileExplorer;
25
+ import org.embulk.input.s3.explorer.S3TimeOrderPrefixFileExplorer;
26
+ import org.embulk.input.s3.utils.DateUtils;
29
27
  import org.embulk.spi.BufferAllocator;
30
28
  import org.embulk.spi.Exec;
31
29
  import org.embulk.spi.FileInputPlugin;
@@ -40,6 +38,9 @@ import org.slf4j.Logger;
40
38
 
41
39
  import java.io.IOException;
42
40
  import java.io.InputStream;
41
+ import java.text.SimpleDateFormat;
42
+ import java.util.Collections;
43
+ import java.util.Date;
43
44
  import java.util.Iterator;
44
45
  import java.util.List;
45
46
  import java.util.Optional;
@@ -51,6 +52,7 @@ public abstract class AbstractS3FileInputPlugin
51
52
  implements FileInputPlugin
52
53
  {
53
54
  private static final Logger LOGGER = Exec.getLogger(S3FileInputPlugin.class);
55
+ private static final String FULL_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
54
56
 
55
57
  public interface PluginTask
56
58
  extends AwsCredentialsTask, FileList.Task, RetrySupportPluginTask, Task
@@ -88,12 +90,35 @@ public abstract class AbstractS3FileInputPlugin
88
90
  @ConfigDefault("false")
89
91
  boolean getSkipGlacierObjects();
90
92
 
93
+ @Config("use_modified_time")
94
+ @ConfigDefault("false")
95
+ boolean getUseModifiedTime();
96
+
97
+ @Config("last_modified_time")
98
+ @ConfigDefault("null")
99
+ Optional<String> getLastModifiedTime();
100
+
91
101
  // TODO timeout, ssl, etc
92
102
 
103
+ ////////////////////////////////////////
104
+ // Internal configurations
105
+ ////////////////////////////////////////
106
+
93
107
  FileList getFiles();
94
108
 
95
109
  void setFiles(FileList files);
96
110
 
111
+ /**
112
+ * end_modified_time is conditionally set if modified_time mode is enabled.
113
+ *
114
+ * It is internal state and must not be set in config.yml
115
+ */
116
+ @Config("__end_modified_time")
117
+ @ConfigDefault("null")
118
+ Optional<Date> getEndModifiedTime();
119
+
120
+ void setEndModifiedTime(Optional<Date> endModifiedTime);
121
+
97
122
  @ConfigInject
98
123
  BufferAllocator getBufferAllocator();
99
124
  }
@@ -105,6 +130,7 @@ public abstract class AbstractS3FileInputPlugin
105
130
  {
106
131
  PluginTask task = config.loadConfig(getTaskClass());
107
132
 
133
+ errorIfInternalParamsAreSet(task);
108
134
  validateInputTask(task);
109
135
  // list files recursively
110
136
  task.setFiles(listFiles(task));
@@ -130,9 +156,15 @@ public abstract class AbstractS3FileInputPlugin
130
156
 
131
157
  // last_path
132
158
  if (task.getIncremental()) {
133
- Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
134
- LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
135
- configDiff.set("last_path", lastPath);
159
+ if (task.getUseModifiedTime()) {
160
+ Date endModifiedTime = task.getEndModifiedTime().orElse(new Date());
161
+ configDiff.set("last_modified_time", new SimpleDateFormat(FULL_DATE_FORMAT).format(endModifiedTime));
162
+ }
163
+ else {
164
+ Optional<String> lastPath = task.getFiles().getLastPath(task.getLastPath());
165
+ LOGGER.info("Incremental job, setting last_path to [{}]", lastPath.orElse(""));
166
+ configDiff.set("last_path", lastPath);
167
+ }
136
168
  }
137
169
  return configDiff;
138
170
  }
@@ -237,22 +269,35 @@ public abstract class AbstractS3FileInputPlugin
237
269
  String bucketName = task.getBucket();
238
270
  FileList.Builder builder = new FileList.Builder(task);
239
271
  RetryExecutor retryExec = retryExecutorFrom(task);
272
+
240
273
  if (task.getPath().isPresent()) {
241
274
  LOGGER.info("Start getting object with path: [{}]", task.getPath().get());
242
- addS3DirectObject(builder, client, task.getBucket(), task.getPath().get(), retryExec);
275
+ new S3SingleFileExplorer(bucketName, client, retryExec, task.getPath().get()).addToBuilder(builder);
276
+ return builder.build();
243
277
  }
244
- else {
245
- // does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
246
- LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
247
- if (task.getPathPrefix().get().equals("/")) {
248
- LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
249
- }
250
278
 
251
- listS3FilesByPrefix(builder, client, bucketName,
252
- task.getPathPrefix().get(), task.getLastPath(), task.getSkipGlacierObjects(), retryExec);
253
- LOGGER.info("Found total [{}] files", builder.size());
279
+ // does not need to verify existent path prefix here since there is the validation requires either path or path_prefix
280
+ LOGGER.info("Start listing file with prefix [{}]", task.getPathPrefix().get());
281
+ if (task.getPathPrefix().get().equals("/")) {
282
+ LOGGER.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
254
283
  }
255
284
 
285
+ if (task.getUseModifiedTime()) {
286
+ Date now = new Date();
287
+ Optional<Date> from = task.getLastModifiedTime().isPresent()
288
+ ? Optional.of(DateUtils.parse(task.getLastModifiedTime().get(), Collections.singletonList(FULL_DATE_FORMAT)))
289
+ : Optional.empty();
290
+ task.setEndModifiedTime(Optional.of(now));
291
+
292
+ new S3TimeOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
293
+ task.getSkipGlacierObjects(), from, now).addToBuilder(builder);
294
+ }
295
+ else {
296
+ new S3NameOrderPrefixFileExplorer(bucketName, client, retryExec, task.getPathPrefix().get(),
297
+ task.getSkipGlacierObjects(), task.getLastPath().orElse(null)).addToBuilder(builder);
298
+ }
299
+
300
+ LOGGER.info("Found total [{}] files", builder.size());
256
301
  return builder.build();
257
302
  }
258
303
  catch (AmazonServiceException ex) {
@@ -268,107 +313,13 @@ public abstract class AbstractS3FileInputPlugin
268
313
  }
269
314
  }
270
315
 
271
- @VisibleForTesting
272
- public void addS3DirectObject(FileList.Builder builder,
273
- final AmazonS3 client,
274
- String bucket,
275
- String objectKey)
276
- {
277
- addS3DirectObject(builder, client, bucket, objectKey, null);
278
- }
279
-
280
- @VisibleForTesting
281
- public void addS3DirectObject(FileList.Builder builder,
282
- final AmazonS3 client,
283
- String bucket,
284
- String objectKey,
285
- RetryExecutor retryExec)
286
- {
287
- final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucket, objectKey);
288
-
289
- ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
290
- @Override
291
- public ObjectMetadata call()
292
- {
293
- return client.getObjectMetadata(objectMetadataRequest);
294
- }
295
- }.executeWith(retryExec);
296
-
297
- builder.add(objectKey, objectMetadata.getContentLength());
298
- }
299
-
300
- private void validateInputTask(PluginTask task)
316
+ private void validateInputTask(final PluginTask task)
301
317
  {
302
318
  if (!task.getPathPrefix().isPresent() && !task.getPath().isPresent()) {
303
319
  throw new ConfigException("Either path or path_prefix is required");
304
320
  }
305
321
  }
306
322
 
307
- @VisibleForTesting
308
- public static void listS3FilesByPrefix(FileList.Builder builder,
309
- final AmazonS3 client,
310
- String bucketName,
311
- String prefix,
312
- Optional<String> lastPath,
313
- boolean skipGlacierObjects)
314
- {
315
- listS3FilesByPrefix(builder, client, bucketName, prefix, lastPath, skipGlacierObjects, null);
316
- }
317
-
318
- /**
319
- * Lists S3 filenames filtered by prefix.
320
- * <p>
321
- * The resulting list does not include the file that's size == 0.
322
- * @param builder custom Filelist builder
323
- * @param client Amazon S3
324
- * @param bucketName Amazon S3 bucket name
325
- * @param prefix Amazon S3 bucket name prefix
326
- * @param lastPath last path
327
- * @param skipGlacierObjects skip gracier objects
328
- * @param retryExec a retry executor object to do the retrying
329
- */
330
- @VisibleForTesting
331
- public static void listS3FilesByPrefix(FileList.Builder builder,
332
- final AmazonS3 client,
333
- String bucketName,
334
- String prefix,
335
- Optional<String> lastPath,
336
- boolean skipGlacierObjects,
337
- RetryExecutor retryExec)
338
- {
339
- String lastKey = lastPath.orElse(null);
340
- do {
341
- final String finalLastKey = lastKey;
342
- final ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, finalLastKey, null, 1024);
343
- ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects") {
344
- @Override
345
- public ObjectListing call()
346
- {
347
- return client.listObjects(req);
348
- }
349
- }.executeWith(retryExec);
350
- for (S3ObjectSummary s : ol.getObjectSummaries()) {
351
- if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
352
- if (skipGlacierObjects) {
353
- Exec.getLogger("AbstractS3FileInputPlugin.class").warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
354
- continue;
355
- }
356
- else {
357
- throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
358
- }
359
- }
360
- if (s.getSize() > 0) {
361
- builder.add(s.getKey(), s.getSize());
362
- if (!builder.needsMore()) {
363
- LOGGER.warn("Too many files matched, stop listing file");
364
- return;
365
- }
366
- }
367
- }
368
- lastKey = ol.getNextMarker();
369
- } while (lastKey != null);
370
- }
371
-
372
323
  @Override
373
324
  public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
374
325
  {
@@ -440,6 +391,14 @@ public abstract class AbstractS3FileInputPlugin
440
391
  }
441
392
  }
442
393
 
394
+ @VisibleForTesting
395
+ static void errorIfInternalParamsAreSet(PluginTask task)
396
+ {
397
+ if (task.getEndModifiedTime().isPresent()) {
398
+ throw new ConfigException("'__end_modified_time' must not be set.");
399
+ }
400
+ }
401
+
443
402
  // TODO create single-file InputStreamFileInput utility
444
403
  private class SingleFileProvider
445
404
  implements InputStreamFileInput.Provider
@@ -19,7 +19,7 @@ import static org.embulk.spi.util.RetryExecutor.Retryable;
19
19
  * Retryable utility, regardless the occurred exceptions,
20
20
  * Also provide a default approach for exception propagation.
21
21
  */
22
- class DefaultRetryable<T> implements Retryable<T>
22
+ public class DefaultRetryable<T> implements Retryable<T>
23
23
  {
24
24
  private static final Logger log = Exec.getLogger(DefaultRetryable.class);
25
25
  private static final Set<Integer> NONRETRYABLE_STATUS_CODES = new HashSet<Integer>(2);
@@ -0,0 +1,21 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import org.embulk.input.s3.FileList;
5
+ import org.embulk.spi.util.RetryExecutor;
6
+
7
+ public abstract class S3FileExplorer
8
+ {
9
+ protected String bucketName;
10
+ protected AmazonS3 s3Client;
11
+ protected RetryExecutor retryExecutor;
12
+
13
+ public S3FileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor)
14
+ {
15
+ this.bucketName = bucketName;
16
+ this.s3Client = s3Client;
17
+ this.retryExecutor = retryExecutor;
18
+ }
19
+
20
+ public abstract void addToBuilder(FileList.Builder builder);
21
+ }
@@ -0,0 +1,45 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
7
+ import org.embulk.input.s3.DefaultRetryable;
8
+ import org.embulk.spi.util.RetryExecutor;
9
+
10
+ import java.util.List;
11
+
12
+ public class S3NameOrderPrefixFileExplorer extends S3PrefixFileExplorer
13
+ {
14
+ private String lastPath;
15
+
16
+ public S3NameOrderPrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor,
17
+ final String pathPrefix, final boolean skipGlacierObjects, final String lastPath)
18
+ {
19
+ super(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects);
20
+ this.lastPath = lastPath;
21
+ }
22
+
23
+ @Override
24
+ protected List<S3ObjectSummary> fetch()
25
+ {
26
+ final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
27
+ final ObjectListing ol = new DefaultRetryable<ObjectListing>("Listing objects")
28
+ {
29
+ @Override
30
+ public ObjectListing call()
31
+ {
32
+ return s3Client.listObjects(req);
33
+ }
34
+ }.executeWith(retryExecutor);
35
+ lastPath = ol.getNextMarker();
36
+
37
+ return ol.getObjectSummaries();
38
+ }
39
+
40
+ @Override
41
+ protected boolean hasNext()
42
+ {
43
+ return lastPath != null;
44
+ }
45
+ }
@@ -0,0 +1,57 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
5
+ import com.amazonaws.services.s3.model.StorageClass;
6
+ import org.embulk.config.ConfigException;
7
+ import org.embulk.input.s3.FileList;
8
+ import org.embulk.spi.Exec;
9
+ import org.embulk.spi.util.RetryExecutor;
10
+ import org.slf4j.Logger;
11
+
12
+ import java.util.List;
13
+
14
+ public abstract class S3PrefixFileExplorer extends S3FileExplorer
15
+ {
16
+ private static final Logger LOGGER = Exec.getLogger(S3PrefixFileExplorer.class);
17
+
18
+ protected String pathPrefix;
19
+
20
+ private final boolean skipGlacierObjects;
21
+
22
+ public S3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
23
+ {
24
+ super(bucketName, s3Client, retryExecutor);
25
+ this.pathPrefix = pathPrefix;
26
+ this.skipGlacierObjects = skipGlacierObjects;
27
+ }
28
+
29
+ @Override
30
+ public void addToBuilder(final FileList.Builder builder)
31
+ {
32
+ do {
33
+ final List<S3ObjectSummary> s3ObjectSummaries = fetch();
34
+
35
+ for (final S3ObjectSummary s : s3ObjectSummaries) {
36
+ if (s.getStorageClass().equals(StorageClass.Glacier.toString())) {
37
+ if (skipGlacierObjects) {
38
+ LOGGER.warn("Skipped \"s3://{}/{}\" that stored at Glacier.", bucketName, s.getKey());
39
+ continue;
40
+ }
41
+ throw new ConfigException("Detected an object stored at Glacier. Set \"skip_glacier_objects\" option to \"true\" to skip this.");
42
+ }
43
+ if (s.getSize() > 0) {
44
+ builder.add(s.getKey(), s.getSize());
45
+ if (!builder.needsMore()) {
46
+ LOGGER.warn("Too many files matched, stop listing file");
47
+ return;
48
+ }
49
+ }
50
+ }
51
+ } while (hasNext());
52
+ }
53
+
54
+ protected abstract List<S3ObjectSummary> fetch();
55
+
56
+ protected abstract boolean hasNext();
57
+ }
@@ -0,0 +1,35 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
5
+ import com.amazonaws.services.s3.model.ObjectMetadata;
6
+ import org.embulk.input.s3.DefaultRetryable;
7
+ import org.embulk.input.s3.FileList;
8
+ import org.embulk.spi.util.RetryExecutor;
9
+
10
+ public class S3SingleFileExplorer extends S3FileExplorer
11
+ {
12
+ private final String path;
13
+
14
+ public S3SingleFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor, final String path)
15
+ {
16
+ super(bucket, client, retryExecutor);
17
+ this.path = path;
18
+ }
19
+
20
+ @Override
21
+ public void addToBuilder(final FileList.Builder builder)
22
+ {
23
+ final GetObjectMetadataRequest objectMetadataRequest = new GetObjectMetadataRequest(bucketName, path);
24
+
25
+ final ObjectMetadata objectMetadata = new DefaultRetryable<ObjectMetadata>("Looking up for a single object") {
26
+ @Override
27
+ public ObjectMetadata call()
28
+ {
29
+ return s3Client.getObjectMetadata(objectMetadataRequest);
30
+ }
31
+ }.executeWith(retryExecutor);
32
+
33
+ builder.add(path, objectMetadata.getContentLength());
34
+ }
35
+ }
@@ -0,0 +1,70 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
7
+ import org.apache.commons.lang3.StringUtils;
8
+ import org.embulk.input.s3.DefaultRetryable;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.slf4j.Logger;
12
+
13
+ import java.util.Date;
14
+ import java.util.List;
15
+ import java.util.Optional;
16
+ import java.util.stream.Collectors;
17
+
18
+ public class S3TimeOrderPrefixFileExplorer extends S3PrefixFileExplorer
19
+ {
20
+ private static final Logger LOGGER = Exec.getLogger(S3TimeOrderPrefixFileExplorer.class);
21
+
22
+ private final Optional<Date> from;
23
+ private final Date to;
24
+
25
+ private String lastPath;
26
+
27
+ private int numOfReq = 0;
28
+
29
+ public S3TimeOrderPrefixFileExplorer(final String bucket, final AmazonS3 client, final RetryExecutor retryExecutor,
30
+ final String pathPrefix, final boolean skipGlacierObjects, final Optional<Date> from, final Date to)
31
+ {
32
+ super(bucket, client, retryExecutor, pathPrefix, skipGlacierObjects);
33
+ this.from = from;
34
+ this.to = to;
35
+ }
36
+
37
+ @Override
38
+ public List<S3ObjectSummary> fetch()
39
+ {
40
+ ++numOfReq;
41
+
42
+ final ListObjectsRequest req = new ListObjectsRequest(bucketName, pathPrefix, lastPath, null, 1024);
43
+ final ObjectListing objectListing = new DefaultRetryable<ObjectListing>("Listing objects")
44
+ {
45
+ @Override
46
+ public ObjectListing call()
47
+ {
48
+ return s3Client.listObjects(req);
49
+ }
50
+ }.executeWith(retryExecutor);
51
+ lastPath = objectListing.getNextMarker();
52
+
53
+ return objectListing.getObjectSummaries()
54
+ .stream()
55
+ .filter(s3ObjectSummary -> s3ObjectSummary.getLastModified().before(to)
56
+ && (!from.isPresent() || s3ObjectSummary.getLastModified().equals(from.get()) || s3ObjectSummary.getLastModified().after(from.get())))
57
+ .collect(Collectors.toList());
58
+ }
59
+
60
+ @Override
61
+ public boolean hasNext()
62
+ {
63
+ if (lastPath == null) {
64
+ LOGGER.info("The total number of LIST requests is {}{}.", numOfReq,
65
+ numOfReq < 10 ? StringUtils.EMPTY : ". Clean up your s3 bucket to reduce the number of requests and improve the ingesting performance");
66
+ return false;
67
+ }
68
+ return true;
69
+ }
70
+ }
@@ -0,0 +1,28 @@
1
+ package org.embulk.input.s3.utils;
2
+
3
+ import com.google.common.base.Joiner;
4
+ import org.embulk.config.ConfigException;
5
+ import org.joda.time.format.DateTimeFormat;
6
+
7
+ import java.util.Date;
8
+ import java.util.List;
9
+
10
+ public class DateUtils
11
+ {
12
+ public static Date parse(final String value, final List<String> supportedFormats)
13
+ throws ConfigException
14
+ {
15
+ for (final String fmt : supportedFormats) {
16
+ try {
17
+ return DateTimeFormat.forPattern(fmt).parseDateTime(value).toDate();
18
+ } catch (final IllegalArgumentException e) {
19
+ // ignorable exception
20
+ }
21
+ }
22
+ throw new ConfigException("Unsupported DateTime value: '" + value + "', supported formats: [" + Joiner.on(",").join(supportedFormats) + "]");
23
+ }
24
+
25
+ private DateUtils()
26
+ {
27
+ }
28
+ }
@@ -1,16 +1,11 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
3
  import com.amazonaws.services.s3.AmazonS3;
4
- import com.amazonaws.services.s3.model.ListObjectsRequest;
5
- import com.amazonaws.services.s3.model.ObjectListing;
6
4
  import com.amazonaws.services.s3.model.Region;
7
- import com.amazonaws.services.s3.model.S3ObjectSummary;
8
- import com.amazonaws.services.s3.model.StorageClass;
9
5
  import com.google.common.collect.ImmutableList;
10
6
  import com.google.common.collect.ImmutableMap;
11
7
  import org.embulk.EmbulkTestRuntime;
12
8
  import org.embulk.config.ConfigDiff;
13
- import org.embulk.config.ConfigException;
14
9
  import org.embulk.config.ConfigSource;
15
10
  import org.embulk.config.TaskReport;
16
11
  import org.embulk.config.TaskSource;
@@ -25,21 +20,15 @@ import org.junit.Before;
25
20
  import org.junit.BeforeClass;
26
21
  import org.junit.Rule;
27
22
  import org.junit.Test;
28
- import org.mockito.Mockito;
29
23
 
30
- import java.lang.reflect.Field;
31
24
  import java.util.ArrayList;
32
25
  import java.util.List;
33
- import java.util.Optional;
34
26
 
35
27
  import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
36
28
  import static org.junit.Assert.assertEquals;
37
29
  import static org.junit.Assert.assertFalse;
38
30
  import static org.junit.Assert.assertNull;
39
31
  import static org.junit.Assume.assumeNotNull;
40
- import static org.mockito.Matchers.any;
41
- import static org.mockito.Mockito.doReturn;
42
- import static org.mockito.Mockito.mock;
43
32
 
44
33
  public class TestS3FileInputPlugin
45
34
  {
@@ -97,7 +86,6 @@ public class TestS3FileInputPlugin
97
86
 
98
87
  @Test
99
88
  public void useLastPath()
100
- throws Exception
101
89
  {
102
90
  ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
103
91
  ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
@@ -117,7 +105,6 @@ public class TestS3FileInputPlugin
117
105
 
118
106
  @Test
119
107
  public void emptyFilesWithLastPath()
120
- throws Exception
121
108
  {
122
109
  ConfigSource config = this.config.deepCopy()
123
110
  .set("path_prefix", "empty_files_prefix")
@@ -130,7 +117,6 @@ public class TestS3FileInputPlugin
130
117
 
131
118
  @Test
132
119
  public void useTotalFileCountLimit()
133
- throws Exception
134
120
  {
135
121
  ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
136
122
  ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
@@ -141,7 +127,6 @@ public class TestS3FileInputPlugin
141
127
 
142
128
  @Test
143
129
  public void usePathMatchPattern()
144
- throws Exception
145
130
  {
146
131
  { // match pattern
147
132
  ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/sample_01");
@@ -227,44 +212,6 @@ public class TestS3FileInputPlugin
227
212
  assertEquals(s3Client.getRegion(), Region.US_Standard);
228
213
  }
229
214
 
230
- @Test(expected = ConfigException.class)
231
- public void useSkipGlacierObjects() throws Exception
232
- {
233
- AmazonS3 client;
234
- client = mock(AmazonS3.class);
235
- doReturn(s3objectList("in/aa/a", StorageClass.Glacier)).when(client).listObjects(any(ListObjectsRequest.class));
236
-
237
- AbstractS3FileInputPlugin plugin = Mockito.mock(AbstractS3FileInputPlugin.class, Mockito.CALLS_REAL_METHODS);
238
- plugin.listS3FilesByPrefix(newFileList(config, "sample_00", 100L), client, "test_bucket", "test_prefix", Optional.empty(), false);
239
- }
240
-
241
- private FileList.Builder newFileList(ConfigSource config, Object... nameAndSize)
242
- {
243
- FileList.Builder builder = new FileList.Builder(config);
244
- for (int i = 0; i < nameAndSize.length; i += 2) {
245
- builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
246
- }
247
- return builder;
248
- }
249
-
250
- private ObjectListing s3objectList(String key, StorageClass storageClass) throws Exception
251
- {
252
- ObjectListing list = new ObjectListing();
253
-
254
- S3ObjectSummary element = new S3ObjectSummary();
255
- element.setKey(key);
256
- element.setStorageClass(storageClass.toString());
257
-
258
- List<S3ObjectSummary> objectSummaries = new ArrayList<>();
259
- objectSummaries.add(element);
260
-
261
- Field field = list.getClass().getDeclaredField("objectSummaries");
262
- field.setAccessible(true);
263
- field.set(list, objectSummaries);
264
-
265
- return list;
266
- }
267
-
268
215
  static class Control
269
216
  implements InputPlugin.Control
270
217
  {
@@ -0,0 +1,67 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import org.embulk.EmbulkTestRuntime;
7
+ import org.junit.Before;
8
+ import org.junit.Rule;
9
+ import org.junit.Test;
10
+ import org.junit.runner.RunWith;
11
+ import org.mockito.ArgumentCaptor;
12
+ import org.mockito.Mock;
13
+ import org.mockito.internal.util.reflection.FieldSetter;
14
+ import org.mockito.runners.MockitoJUnitRunner;
15
+
16
+ import static org.junit.Assert.assertEquals;
17
+ import static org.junit.Assert.assertFalse;
18
+ import static org.mockito.Matchers.any;
19
+ import static org.mockito.Mockito.mock;
20
+ import static org.mockito.Mockito.verify;
21
+ import static org.mockito.Mockito.when;
22
+
23
+ @RunWith(MockitoJUnitRunner.class)
24
+ public class TestS3NameOrderPrefixFileExplorer
25
+ {
26
+ private static final String BUCKET_NAME = "bucket_name";
27
+ private static final String PATH_PREFIX = "path_prefix";
28
+ private static final String LAST_PATH = "last_path";
29
+
30
+ @Rule
31
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
32
+
33
+ @Mock
34
+ private AmazonS3 s3Client;
35
+
36
+ private S3NameOrderPrefixFileExplorer s3NameOrderPrefixFileExplorer;
37
+
38
+ @Before
39
+ public void setUp()
40
+ {
41
+ s3NameOrderPrefixFileExplorer = new S3NameOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false, LAST_PATH);
42
+ }
43
+
44
+ @Test
45
+ public void fetch_should_return_list_objects()
46
+ {
47
+ final ObjectListing ol = mock(ObjectListing.class);
48
+ when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
49
+
50
+ s3NameOrderPrefixFileExplorer.fetch();
51
+ final ArgumentCaptor<ListObjectsRequest> listObjectsRequestCaptor = ArgumentCaptor.forClass(ListObjectsRequest.class);
52
+
53
+ verify(ol).getNextMarker();
54
+ verify(s3Client).listObjects(listObjectsRequestCaptor.capture());
55
+ final ListObjectsRequest listObjectsRequest = listObjectsRequestCaptor.getValue();
56
+ assertEquals(BUCKET_NAME, listObjectsRequest.getBucketName());
57
+ assertEquals(PATH_PREFIX, listObjectsRequest.getPrefix());
58
+ assertEquals(LAST_PATH, listObjectsRequest.getMarker());
59
+ }
60
+
61
+ @Test
62
+ public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
63
+ {
64
+ new FieldSetter(s3NameOrderPrefixFileExplorer, s3NameOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
65
+ assertFalse(s3NameOrderPrefixFileExplorer.hasNext());
66
+ }
67
+ }
@@ -0,0 +1,128 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
5
+ import com.amazonaws.services.s3.model.StorageClass;
6
+ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.embulk.config.ConfigException;
9
+ import org.embulk.input.s3.FileList;
10
+ import org.embulk.spi.util.RetryExecutor;
11
+ import org.junit.Before;
12
+ import org.junit.Rule;
13
+ import org.junit.Test;
14
+ import org.junit.runner.RunWith;
15
+ import org.mockito.Mock;
16
+ import org.mockito.runners.MockitoJUnitRunner;
17
+
18
+ import java.util.Collections;
19
+ import java.util.List;
20
+
21
+ import static org.mockito.Mockito.doReturn;
22
+ import static org.mockito.Mockito.never;
23
+ import static org.mockito.Mockito.spy;
24
+ import static org.mockito.Mockito.times;
25
+ import static org.mockito.Mockito.verify;
26
+ import static org.mockito.Mockito.when;
27
+
28
+ @RunWith(MockitoJUnitRunner.class)
29
+ public class TestS3PrefixFileExplorer
30
+ {
31
+ private static final String PATH_PREFIX = "path_prefix";
32
+ private static final String BUCKET_NAME = "bucket_name";
33
+ private static final String OBJECT_KEY = "key";
34
+
35
+ @SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
36
+ @Rule
37
+ public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
38
+
39
+ @Mock
40
+ private AmazonS3 s3Client;
41
+
42
+ @Mock
43
+ private FileList.Builder builder;
44
+
45
+ @Mock
46
+ private S3ObjectSummary s3ObjectSummary;
47
+
48
+ private S3PrefixFileExplorer s3PrefixFileExplorer;
49
+
50
+ @Before
51
+ public void setUp()
52
+ {
53
+ s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, false);
54
+ doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
55
+ }
56
+
57
+ @Test(expected = ConfigException.class)
58
+ public void addToBuilder_should_throw_exception_if_notskipped_glacier_storage()
59
+ {
60
+ when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
61
+ s3PrefixFileExplorer.addToBuilder(builder);
62
+ }
63
+
64
+ @Test
65
+ public void addToBuilder_should_skip_glacier_storage_if_allowed()
66
+ {
67
+ when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Glacier.toString());
68
+ // override spied object for changing `skipGlacierObjects`
69
+ s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
70
+ doReturn(false).when(s3PrefixFileExplorer).hasNext();
71
+ doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
72
+ s3PrefixFileExplorer.addToBuilder(builder);
73
+
74
+ verify(s3PrefixFileExplorer).hasNext();
75
+ verify(s3ObjectSummary, never()).getSize();
76
+ }
77
+
78
+ @Test
79
+ public void addToBuilder_should_loop_till_nothing_left()
80
+ {
81
+ // There are 3 loops totally but only 2 keys have been imported because the first key is in Glacier storage class and is skipped
82
+ when(builder.needsMore()).thenReturn(true);
83
+ // override spied object for changing `skipGlacierObjects`
84
+ s3PrefixFileExplorer = spyS3PrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX, true);
85
+ when(s3ObjectSummary.getStorageClass())
86
+ .thenReturn(StorageClass.Glacier.toString())
87
+ .thenReturn(StorageClass.Standard.toString());
88
+ when(s3ObjectSummary.getSize()).thenReturn(1L);
89
+ when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
90
+ doReturn(Collections.singletonList(s3ObjectSummary)).when(s3PrefixFileExplorer).fetch();
91
+ doReturn(true).doReturn(true).doReturn(false).when(s3PrefixFileExplorer).hasNext();
92
+
93
+ s3PrefixFileExplorer.addToBuilder(builder);
94
+ verify(builder, times(2)).add(PATH_PREFIX + OBJECT_KEY, 1);
95
+ }
96
+
97
+ @Test
98
+ public void addToBuilder_should_stop_import_if_too_many_files()
99
+ {
100
+ when(builder.needsMore()).thenReturn(false);
101
+ when(s3ObjectSummary.getStorageClass()).thenReturn(StorageClass.Standard.toString());
102
+ when(s3ObjectSummary.getKey()).thenReturn(PATH_PREFIX + OBJECT_KEY);
103
+ when(s3ObjectSummary.getSize()).thenReturn(1L);
104
+ doReturn(true).when(s3PrefixFileExplorer).hasNext();
105
+ s3PrefixFileExplorer.addToBuilder(builder);
106
+
107
+ verify(builder).add(PATH_PREFIX + OBJECT_KEY, 1);
108
+ verify(s3PrefixFileExplorer, never()).hasNext();
109
+ }
110
+
111
+ private S3PrefixFileExplorer spyS3PrefixFileExplorer(final String bucketName, final AmazonS3 s3Client, final RetryExecutor retryExecutor, final String pathPrefix, final boolean skipGlacierObjects)
112
+ {
113
+ return spy(new S3PrefixFileExplorer(bucketName, s3Client, retryExecutor, pathPrefix, skipGlacierObjects)
114
+ {
115
+ @Override
116
+ protected List<S3ObjectSummary> fetch()
117
+ {
118
+ return null;
119
+ }
120
+
121
+ @Override
122
+ protected boolean hasNext()
123
+ {
124
+ return false;
125
+ }
126
+ });
127
+ }
128
+ }
@@ -0,0 +1,56 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
5
+ import com.amazonaws.services.s3.model.ObjectMetadata;
6
+ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.embulk.input.s3.FileList;
9
+ import org.junit.Before;
10
+ import org.junit.Rule;
11
+ import org.junit.Test;
12
+ import org.junit.runner.RunWith;
13
+ import org.mockito.Mock;
14
+ import org.mockito.runners.MockitoJUnitRunner;
15
+
16
+ import static org.mockito.Matchers.any;
17
+ import static org.mockito.Mockito.verify;
18
+ import static org.mockito.Mockito.when;
19
+
20
+ @RunWith(MockitoJUnitRunner.class)
21
+ public class TestS3SingleFileExplorer
22
+ {
23
+ private static final String PATH = "path";
24
+ private static final String BUCKET_NAME = "bucket_name";
25
+
26
+ @SuppressFBWarnings("URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD")
27
+ @Rule
28
+ public EmbulkTestRuntime embulkTestRuntime = new EmbulkTestRuntime();
29
+
30
+ @Mock
31
+ private AmazonS3 s3Client;
32
+
33
+ @Mock
34
+ private FileList.Builder builder;
35
+
36
+ @Mock
37
+ private ObjectMetadata metadata;
38
+
39
+ private S3SingleFileExplorer s3SingleFileExplorer;
40
+
41
+ @Before
42
+ public void setUp()
43
+ {
44
+ s3SingleFileExplorer = new S3SingleFileExplorer(BUCKET_NAME, s3Client, null, PATH);
45
+ }
46
+
47
+ @Test
48
+ public void addToBuilder_should_request_single_object_metadata()
49
+ {
50
+ when(s3Client.getObjectMetadata(any(GetObjectMetadataRequest.class))).thenReturn(metadata);
51
+ when(metadata.getContentLength()).thenReturn(1L);
52
+ s3SingleFileExplorer.addToBuilder(builder);
53
+
54
+ verify(builder).add(PATH, 1);
55
+ }
56
+ }
@@ -0,0 +1,112 @@
1
+ package org.embulk.input.s3.explorer;
2
+
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
5
+ import com.amazonaws.services.s3.model.ObjectListing;
6
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.junit.Before;
9
+ import org.junit.Rule;
10
+ import org.junit.Test;
11
+ import org.junit.runner.RunWith;
12
+ import org.mockito.Mock;
13
+ import org.mockito.internal.util.reflection.FieldSetter;
14
+ import org.mockito.runners.MockitoJUnitRunner;
15
+
16
+ import java.util.Arrays;
17
+ import java.util.Calendar;
18
+ import java.util.List;
19
+ import java.util.Optional;
20
+
21
+ import static org.junit.Assert.assertEquals;
22
+ import static org.junit.Assert.assertFalse;
23
+ import static org.mockito.Matchers.any;
24
+ import static org.mockito.Mockito.mock;
25
+ import static org.mockito.Mockito.when;
26
+
27
+ @RunWith(MockitoJUnitRunner.class)
28
+ public class TestS3TimeOrderPrefixFileExplorer
29
+ {
30
+ private static final String BUCKET_NAME = "bucket_name";
31
+ private static final String PATH_PREFIX = "path_prefix";
32
+
33
+ @Rule
34
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
35
+
36
+ @Mock
37
+ private AmazonS3 s3Client;
38
+
39
+ private S3TimeOrderPrefixFileExplorer s3TimeOrderPrefixFileExplorer;
40
+
41
+ @Before
42
+ public void setUp()
43
+ {
44
+ final Calendar cal = Calendar.getInstance();
45
+ cal.set(2019, Calendar.MAY, 25, 10, 0);
46
+ s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
47
+ false, Optional.empty(), cal.getTime());
48
+ }
49
+
50
+ @Test
51
+ public void fetch_should_return_filtered_objects_before_end_time()
52
+ {
53
+ final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
54
+ final Calendar cal = Calendar.getInstance();
55
+ cal.set(2019, Calendar.MAY, 24, 10, 0);
56
+ when(s3ObjectBefore.getLastModified()).thenReturn(cal.getTime());
57
+
58
+ final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
59
+ cal.set(2019, Calendar.MAY, 26, 10, 0);
60
+ when(s3ObjectAfter.getLastModified()).thenReturn(cal.getTime());
61
+
62
+ final ObjectListing ol = mock(ObjectListing.class);
63
+ when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
64
+ when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectBefore, s3ObjectAfter));
65
+
66
+ final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
67
+ assertEquals(1, result.size());
68
+ assertEquals(s3ObjectBefore, result.get(0));
69
+ }
70
+
71
+ @Test
72
+ public void fetch_should_return_filtered_objects_after_or_equals_begin_time()
73
+ {
74
+ final Calendar to = Calendar.getInstance();
75
+ to.set(2019, Calendar.MAY, 25, 10, 0);
76
+ final Calendar from = Calendar.getInstance();
77
+ from.set(2019, Calendar.MAY, 24, 10, 0);
78
+ s3TimeOrderPrefixFileExplorer = new S3TimeOrderPrefixFileExplorer(BUCKET_NAME, s3Client, null, PATH_PREFIX,
79
+ false, Optional.of(from.getTime()), to.getTime());
80
+
81
+ final S3ObjectSummary s3ObjectEqual = mock(S3ObjectSummary.class);
82
+ final Calendar equalCal = Calendar.getInstance();
83
+ equalCal.set(2019, Calendar.MAY, 24, 10, 0);
84
+ when(s3ObjectEqual.getLastModified()).thenReturn(equalCal.getTime());
85
+
86
+ final S3ObjectSummary s3ObjectBefore = mock(S3ObjectSummary.class);
87
+ final Calendar beforeCal = Calendar.getInstance();
88
+ beforeCal.set(2019, Calendar.MAY, 24, 20, 0);
89
+ when(s3ObjectBefore.getLastModified()).thenReturn(beforeCal.getTime());
90
+
91
+ final S3ObjectSummary s3ObjectAfter = mock(S3ObjectSummary.class);
92
+ final Calendar afterCal = Calendar.getInstance();
93
+ afterCal.set(2019, Calendar.MAY, 26, 10, 0);
94
+ when(s3ObjectAfter.getLastModified()).thenReturn(afterCal.getTime());
95
+
96
+ final ObjectListing ol = mock(ObjectListing.class);
97
+ when(s3Client.listObjects(any(ListObjectsRequest.class))).thenReturn(ol);
98
+ when(ol.getObjectSummaries()).thenReturn(Arrays.asList(s3ObjectEqual, s3ObjectBefore, s3ObjectAfter));
99
+
100
+ final List<S3ObjectSummary> result = s3TimeOrderPrefixFileExplorer.fetch();
101
+ assertEquals(2, result.size());
102
+ assertEquals(s3ObjectEqual, result.get(0));
103
+ assertEquals(s3ObjectBefore, result.get(1));
104
+ }
105
+
106
+ @Test
107
+ public void hasNext_should_return_false_if_no_lastpath() throws NoSuchFieldException
108
+ {
109
+ new FieldSetter(s3TimeOrderPrefixFileExplorer, s3TimeOrderPrefixFileExplorer.getClass().getDeclaredField("lastPath")).set(null);
110
+ assertFalse(s3TimeOrderPrefixFileExplorer.hasNext());
111
+ }
112
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-05 00:00:00.000000000 Z
11
+ date: 2019-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -53,19 +53,28 @@ files:
53
53
  - src/main/java/org/embulk/input/s3/HttpProxy.java
54
54
  - src/main/java/org/embulk/input/s3/RetrySupportPluginTask.java
55
55
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
56
- - src/test/java/org/embulk/input/s3/TestAbstractS3FileInputPlugin.java
56
+ - src/main/java/org/embulk/input/s3/explorer/S3FileExplorer.java
57
+ - src/main/java/org/embulk/input/s3/explorer/S3NameOrderPrefixFileExplorer.java
58
+ - src/main/java/org/embulk/input/s3/explorer/S3PrefixFileExplorer.java
59
+ - src/main/java/org/embulk/input/s3/explorer/S3SingleFileExplorer.java
60
+ - src/main/java/org/embulk/input/s3/explorer/S3TimeOrderPrefixFileExplorer.java
61
+ - src/main/java/org/embulk/input/s3/utils/DateUtils.java
57
62
  - src/test/java/org/embulk/input/s3/TestAwsCredentials.java
58
63
  - src/test/java/org/embulk/input/s3/TestDefaultRetryable.java
59
64
  - src/test/java/org/embulk/input/s3/TestFileList.java
60
65
  - src/test/java/org/embulk/input/s3/TestHttpProxy.java
61
66
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
62
67
  - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
68
+ - src/test/java/org/embulk/input/s3/explorer/TestS3NameOrderPrefixFileExplorer.java
69
+ - src/test/java/org/embulk/input/s3/explorer/TestS3PrefixFileExplorer.java
70
+ - src/test/java/org/embulk/input/s3/explorer/TestS3SingleFileExplorer.java
71
+ - src/test/java/org/embulk/input/s3/explorer/TestS3TimeOrderPrefixFileExplorer.java
63
72
  - src/test/resources/sample_01.csv
64
- - classpath/embulk-util-aws-credentials-0.3.3.jar
73
+ - classpath/embulk-util-aws-credentials-0.3.4.jar
65
74
  - classpath/httpcore-4.4.9.jar
66
75
  - classpath/httpclient-4.5.5.jar
67
76
  - classpath/ion-java-1.0.2.jar
68
- - classpath/embulk-input-s3-0.3.3.jar
77
+ - classpath/embulk-input-s3-0.3.4.jar
69
78
  - classpath/aws-java-sdk-core-1.11.466.jar
70
79
  - classpath/jcl-over-slf4j-1.7.12.jar
71
80
  - classpath/commons-codec-1.10.jar
@@ -1,164 +0,0 @@
1
- package org.embulk.input.s3;
2
-
3
- import com.amazonaws.AmazonServiceException;
4
- import com.amazonaws.services.s3.AmazonS3;
5
- import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
6
- import com.amazonaws.services.s3.model.ListObjectsRequest;
7
- import com.amazonaws.services.s3.model.ObjectListing;
8
- import com.amazonaws.services.s3.model.ObjectMetadata;
9
- import org.apache.http.HttpStatus;
10
- import org.embulk.EmbulkTestRuntime;
11
- import org.embulk.spi.util.RetryExecutor;
12
- import org.junit.Before;
13
- import org.junit.Rule;
14
- import org.junit.Test;
15
-
16
- import java.util.Optional;
17
-
18
- import static org.mockito.Matchers.any;
19
- import static org.mockito.Mockito.doReturn;
20
- import static org.mockito.Mockito.doThrow;
21
- import static org.mockito.Mockito.mock;
22
-
23
- public class TestAbstractS3FileInputPlugin
24
- {
25
- private static RetryExecutor retryExecutor()
26
- {
27
- return RetryExecutor.retryExecutor()
28
- .withInitialRetryWait(0)
29
- .withMaxRetryWait(0);
30
- }
31
-
32
- private static AbstractS3FileInputPlugin dummyS3Plugin()
33
- {
34
- return new AbstractS3FileInputPlugin()
35
- {
36
- @Override
37
- protected Class<? extends PluginTask> getTaskClass()
38
- {
39
- return PluginTask.class;
40
- }
41
- };
42
- }
43
-
44
- private static class SomeException extends RuntimeException
45
- {
46
- }
47
-
48
- @Rule
49
- public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
50
-
51
- private AmazonS3 client;
52
-
53
- @Before
54
- public void createResources()
55
- {
56
- client = mock(AmazonS3.class);
57
- }
58
-
59
- @Test
60
- public void listS3FilesByPrefix()
61
- {
62
- doReturn(new ObjectListing()).when(client).listObjects(any(ListObjectsRequest.class));
63
- FileList.Builder builder = new FileList.Builder();
64
- dummyS3Plugin().listS3FilesByPrefix(builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true);
65
- }
66
-
67
- @Test
68
- public void listS3FileByPrefix_with_retry()
69
- {
70
- doThrow(new RuntimeException()).doReturn(new ObjectListing())
71
- .when(client).listObjects(any(ListObjectsRequest.class));
72
- FileList.Builder builder = new FileList.Builder();
73
- dummyS3Plugin().listS3FilesByPrefix(
74
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
75
- retryExecutor().withRetryLimit(1));
76
- }
77
-
78
- @Test(expected = SomeException.class)
79
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception()
80
- {
81
- doThrow(new SomeException()).doReturn(new ObjectListing())
82
- .when(client).listObjects(any(ListObjectsRequest.class));
83
- FileList.Builder builder = new FileList.Builder();
84
- dummyS3Plugin().listS3FilesByPrefix(
85
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
86
- retryExecutor().withRetryLimit(0));
87
- }
88
-
89
- @Test(expected = AmazonServiceException.class)
90
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_forbidden_code()
91
- {
92
- AmazonServiceException exception = new AmazonServiceException("Forbidden exception");
93
- exception.setStatusCode(HttpStatus.SC_FORBIDDEN);
94
- exception.setErrorType(AmazonServiceException.ErrorType.Client);
95
-
96
- doThrow(exception).doReturn(new ObjectListing())
97
- .when(client).listObjects(any(ListObjectsRequest.class));
98
- FileList.Builder builder = new FileList.Builder();
99
- dummyS3Plugin().listS3FilesByPrefix(
100
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
101
- retryExecutor().withRetryLimit(1));
102
- }
103
-
104
- @Test(expected = AmazonServiceException.class)
105
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_methodnotallow_code()
106
- {
107
- AmazonServiceException exception = new AmazonServiceException("method not allow exception");
108
- exception.setStatusCode(HttpStatus.SC_METHOD_NOT_ALLOWED);
109
- exception.setErrorType(AmazonServiceException.ErrorType.Client);
110
-
111
- doThrow(exception).doReturn(new ObjectListing())
112
- .when(client).listObjects(any(ListObjectsRequest.class));
113
- FileList.Builder builder = new FileList.Builder();
114
- dummyS3Plugin().listS3FilesByPrefix(
115
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
116
- retryExecutor().withRetryLimit(1));
117
- }
118
-
119
- @Test(expected = AmazonServiceException.class)
120
- public void listS3FileByPrefix_on_retry_gave_up_should_throw_the_original_exception_in_expiredToken_code()
121
- {
122
- AmazonServiceException exception = new AmazonServiceException("expired token exception");
123
- exception.setStatusCode(HttpStatus.SC_BAD_REQUEST);
124
- exception.setErrorCode("ExpiredToken");
125
- exception.setErrorType(AmazonServiceException.ErrorType.Client);
126
-
127
- doThrow(exception).doReturn(new ObjectListing())
128
- .when(client).listObjects(any(ListObjectsRequest.class));
129
- FileList.Builder builder = new FileList.Builder();
130
- dummyS3Plugin().listS3FilesByPrefix(
131
- builder, client, "some_bucket", "some_prefix", Optional.of("last_path"), true,
132
- retryExecutor().withRetryLimit(1));
133
- }
134
-
135
- @Test
136
- public void addS3DirectObject()
137
- {
138
- doReturn(new ObjectMetadata()).when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
139
- FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
140
- dummyS3Plugin().addS3DirectObject(builder, client, "some_bucket", "some_prefix");
141
- }
142
-
143
- @Test
144
- public void addS3DirectObject_with_retry()
145
- {
146
- doThrow(new RuntimeException()).doReturn(new ObjectMetadata())
147
- .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
148
- FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
149
- dummyS3Plugin().addS3DirectObject(
150
- builder, client, "some_bucket", "some_prefix",
151
- retryExecutor());
152
- }
153
-
154
- @Test(expected = SomeException.class)
155
- public void addS3DirectObject_on_retry_gave_up_should_throw_original_exception()
156
- {
157
- doThrow(new SomeException()).doReturn(new ObjectMetadata())
158
- .when(client).getObjectMetadata(any(GetObjectMetadataRequest.class));
159
- FileList.Builder builder = new FileList.Builder().pathMatchPattern("");
160
- dummyS3Plugin().addS3DirectObject(
161
- builder, client, "some_bucket", "some_prefix",
162
- retryExecutor().withRetryLimit(0));
163
- }
164
- }