embulk-input-s3 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99dc2bbe73f61b74e92b3f345c963b73ebb75bd0
4
- data.tar.gz: 6c42a7ae99a51dad4d9ec4c20b629a9b235db8b3
3
+ metadata.gz: f74f5e05b21a26a23ae93264b7fb5a6624833eda
4
+ data.tar.gz: 9dd80d007fb72660b3670845a6530a2ca75f316f
5
5
  SHA512:
6
- metadata.gz: 8dee445a23d719bb9f7bf1fd9dde6f0d25faa2e38e8eec567e8d7a61518841f933caf8e35ce79a27a70327d7b75ab65eb1cb54327d7d4e664c848ed3fc24b265
7
- data.tar.gz: 728534ca59e6c3d334335f821d0834bc04f8fd76c5934adfa62132b638a81b1b6dc12874ee718d8352adabe1de3bf26eee3f1a8b3bcf8d117f74076cdc4a3935
6
+ metadata.gz: d93ad5992045608cb56d6a4753119b6dbe6c84c71ce0a360a2cc2001a73f9cb694c4250045e96ccb119cc782278adfaa8087f4b412714fb9ad8a56bcca18774d
7
+ data.tar.gz: db5c37847557095a02e86b3d0d75ad97e13b1e26744d055d38e688e527166c505d3075f5b65b8f16e9d2290bce25800fd8f316509b70710dab4d2207431a1e2f
@@ -1,45 +1,45 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import java.util.List;
4
- import java.util.Iterator;
5
- import java.io.IOException;
6
- import java.io.InterruptedIOException;
7
- import java.io.InputStream;
8
-
3
+ import com.amazonaws.AmazonServiceException;
4
+ import com.amazonaws.ClientConfiguration;
5
+ import com.amazonaws.Protocol;
6
+ import com.amazonaws.auth.AWSCredentialsProvider;
9
7
  import com.amazonaws.services.s3.AmazonS3;
10
8
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
11
- import com.google.common.annotations.VisibleForTesting;
12
- import com.google.common.base.Optional;
13
- import com.google.common.base.Throwables;
14
- import org.slf4j.Logger;
15
- import com.amazonaws.auth.AWSCredentialsProvider;
9
+ import com.amazonaws.services.s3.model.GetObjectRequest;
16
10
  import com.amazonaws.services.s3.model.ListObjectsRequest;
17
- import com.amazonaws.services.s3.model.S3ObjectSummary;
18
11
  import com.amazonaws.services.s3.model.ObjectListing;
19
- import com.amazonaws.services.s3.model.GetObjectRequest;
20
12
  import com.amazonaws.services.s3.model.S3Object;
21
- import com.amazonaws.ClientConfiguration;
22
- import com.amazonaws.AmazonServiceException;
23
- import com.amazonaws.Protocol;
13
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
14
+ import com.google.common.annotations.VisibleForTesting;
15
+ import com.google.common.base.Optional;
16
+ import com.google.common.base.Throwables;
24
17
  import org.embulk.config.Config;
25
- import org.embulk.config.ConfigInject;
26
18
  import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.Task;
28
- import org.embulk.config.TaskSource;
29
- import org.embulk.config.ConfigSource;
30
19
  import org.embulk.config.ConfigDiff;
31
- import org.embulk.config.TaskReport;
32
20
  import org.embulk.config.ConfigException;
21
+ import org.embulk.config.ConfigInject;
22
+ import org.embulk.config.ConfigSource;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskReport;
25
+ import org.embulk.config.TaskSource;
33
26
  import org.embulk.spi.BufferAllocator;
34
27
  import org.embulk.spi.Exec;
35
28
  import org.embulk.spi.FileInputPlugin;
36
29
  import org.embulk.spi.TransactionalFileInput;
37
30
  import org.embulk.spi.util.InputStreamFileInput;
38
31
  import org.embulk.spi.util.ResumableInputStream;
39
- import org.embulk.spi.util.RetryExecutor.Retryable;
40
32
  import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
33
+ import org.embulk.spi.util.RetryExecutor.Retryable;
41
34
  import org.embulk.util.aws.credentials.AwsCredentials;
42
35
  import org.embulk.util.aws.credentials.AwsCredentialsTask;
36
+ import org.slf4j.Logger;
37
+
38
+ import java.io.IOException;
39
+ import java.io.InputStream;
40
+ import java.io.InterruptedIOException;
41
+ import java.util.Iterator;
42
+ import java.util.List;
43
43
 
44
44
  import static org.embulk.spi.util.RetryExecutor.retryExecutor;
45
45
 
@@ -134,12 +134,14 @@ public abstract class AbstractS3FileInputPlugin
134
134
  * e.g., {@link AmazonS3#setEndpoint} will throw a runtime {@link UnsupportedOperationException}
135
135
  * Subclass's customization should be done through {@link AbstractS3FileInputPlugin#defaultS3ClientBuilder}.
136
136
  */
137
- protected AmazonS3 newS3Client(PluginTask task) {
137
+ protected AmazonS3 newS3Client(PluginTask task)
138
+ {
138
139
  return defaultS3ClientBuilder(task).build();
139
140
  }
140
141
 
141
142
  /** A base builder for the subclasses to then customize. */
142
- protected AmazonS3ClientBuilder defaultS3ClientBuilder(PluginTask task) {
143
+ protected AmazonS3ClientBuilder defaultS3ClientBuilder(PluginTask task)
144
+ {
143
145
  return AmazonS3ClientBuilder
144
146
  .standard()
145
147
  .withCredentials(getCredentialsProvider(task))
@@ -158,7 +160,7 @@ public abstract class AbstractS3FileInputPlugin
158
160
  //clientConfig.setProtocol(Protocol.HTTP);
159
161
  clientConfig.setMaxConnections(50); // SDK default: 50
160
162
  clientConfig.setMaxErrorRetry(3); // SDK default: 3
161
- clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
163
+ clientConfig.setSocketTimeout(8 * 60 * 1000); // SDK default: 50*1000
162
164
 
163
165
  // set http proxy
164
166
  if (task.getHttpProxy().isPresent()) {
@@ -276,7 +278,7 @@ public abstract class AbstractS3FileInputPlugin
276
278
  return retryExecutor()
277
279
  .withRetryLimit(3)
278
280
  .withInitialRetryWait(500)
279
- .withMaxRetryWait(30*1000)
281
+ .withMaxRetryWait(30 * 1000)
280
282
  .runInterruptible(new Retryable<InputStream>() {
281
283
  @Override
282
284
  public InputStream call() throws InterruptedIOException
@@ -297,10 +299,11 @@ public abstract class AbstractS3FileInputPlugin
297
299
  throws RetryGiveupException
298
300
  {
299
301
  String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
300
- retryCount, retryLimit, retryWait/1000, exception.getMessage());
302
+ retryCount, retryLimit, retryWait / 1000, exception.getMessage());
301
303
  if (retryCount % 3 == 0) {
302
304
  log.warn(message, exception);
303
- } else {
305
+ }
306
+ else {
304
307
  log.warn(message);
305
308
  }
306
309
  }
@@ -311,10 +314,12 @@ public abstract class AbstractS3FileInputPlugin
311
314
  {
312
315
  }
313
316
  });
314
- } catch (RetryGiveupException ex) {
317
+ }
318
+ catch (RetryGiveupException ex) {
315
319
  Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
316
320
  throw Throwables.propagate(ex.getCause());
317
- } catch (InterruptedException ex) {
321
+ }
322
+ catch (InterruptedException ex) {
318
323
  throw new InterruptedIOException();
319
324
  }
320
325
  }
@@ -329,7 +334,9 @@ public abstract class AbstractS3FileInputPlugin
329
334
  super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
330
335
  }
331
336
 
332
- public void abort() { }
337
+ public void abort()
338
+ {
339
+ }
333
340
 
334
341
  public TaskReport commit()
335
342
  {
@@ -337,7 +344,9 @@ public abstract class AbstractS3FileInputPlugin
337
344
  }
338
345
 
339
346
  @Override
340
- public void close() { }
347
+ public void close()
348
+ {
349
+ }
341
350
  }
342
351
 
343
352
  // TODO create single-file InputStreamFileInput utility
@@ -367,6 +376,8 @@ public abstract class AbstractS3FileInputPlugin
367
376
  }
368
377
 
369
378
  @Override
370
- public void close() { }
379
+ public void close()
380
+ {
381
+ }
371
382
  }
372
383
  }
@@ -1,30 +1,31 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import java.util.List;
4
- import java.util.AbstractList;
5
- import java.util.ArrayList;
6
- import java.util.zip.GZIPInputStream;
7
- import java.util.zip.GZIPOutputStream;
8
- import java.util.regex.Pattern;
9
- import java.util.regex.Matcher;
10
- import java.io.InputStream;
11
- import java.io.OutputStream;
12
- import java.io.BufferedOutputStream;
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonIgnore;
5
+ import com.fasterxml.jackson.annotation.JsonProperty;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.base.Throwables;
8
+
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.ConfigSource;
12
+
13
13
  import java.io.BufferedInputStream;
14
+ import java.io.BufferedOutputStream;
14
15
  import java.io.ByteArrayInputStream;
15
16
  import java.io.ByteArrayOutputStream;
17
+
16
18
  import java.io.IOException;
19
+ import java.io.InputStream;
20
+ import java.io.OutputStream;
17
21
  import java.nio.ByteBuffer;
18
22
  import java.nio.charset.StandardCharsets;
19
- import org.embulk.config.Config;
20
- import org.embulk.config.ConfigDefault;
21
- import org.embulk.config.ConfigSource;
22
- import com.google.common.base.Throwables;
23
- import com.google.common.base.Optional;
24
- import com.google.common.collect.ImmutableList;
25
- import com.fasterxml.jackson.annotation.JsonProperty;
26
- import com.fasterxml.jackson.annotation.JsonIgnore;
27
- import com.fasterxml.jackson.annotation.JsonCreator;
23
+ import java.util.AbstractList;
24
+ import java.util.ArrayList;
25
+ import java.util.List;
26
+ import java.util.regex.Pattern;
27
+ import java.util.zip.GZIPInputStream;
28
+ import java.util.zip.GZIPOutputStream;
28
29
 
29
30
  // this class should be moved to embulk-core
30
31
  public class FileList
@@ -60,10 +61,16 @@ public class FileList
60
61
  }
61
62
 
62
63
  @JsonProperty("index")
63
- public int getIndex() { return index; }
64
+ public int getIndex()
65
+ {
66
+ return index;
67
+ }
64
68
 
65
69
  @JsonProperty("size")
66
- public long getSize() { return size; }
70
+ public long getSize()
71
+ {
72
+ return size;
73
+ }
67
74
  }
68
75
 
69
76
  public static class Builder
@@ -2,15 +2,12 @@ package org.embulk.input.s3;
2
2
 
3
3
  import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
4
4
  import com.amazonaws.services.s3.AmazonS3;
5
- import com.amazonaws.services.s3.AmazonS3Client;
6
5
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
7
6
  import com.google.common.base.Optional;
8
7
  import org.embulk.config.Config;
9
8
  import org.embulk.config.ConfigDefault;
10
-
11
- import static com.amazonaws.services.s3.AmazonS3Client.S3_SERVICE_NAME;
12
- import static com.amazonaws.util.AwsHostNameUtils.parseRegion;
13
- import static com.amazonaws.util.RuntimeHttpUtils.toUri;
9
+ import org.embulk.spi.Exec;
10
+ import org.slf4j.Logger;
14
11
 
15
12
  public class S3FileInputPlugin
16
13
  extends AbstractS3FileInputPlugin
@@ -21,11 +18,16 @@ public class S3FileInputPlugin
21
18
  @Config("endpoint")
22
19
  @ConfigDefault("null")
23
20
  public Optional<String> getEndpoint();
21
+
22
+ @Config("region")
23
+ @ConfigDefault("null")
24
+ public Optional<String> getRegion();
24
25
  }
25
26
 
27
+ private static final Logger log = Exec.getLogger(S3FileInputPlugin.class);
28
+
26
29
  @Override
27
- protected Class<? extends PluginTask> getTaskClass()
28
- {
30
+ protected Class<? extends PluginTask> getTaskClass() {
29
31
  return S3PluginTask.class;
30
32
  }
31
33
 
@@ -33,19 +35,35 @@ public class S3FileInputPlugin
33
35
  protected AmazonS3 newS3Client(PluginTask task)
34
36
  {
35
37
  S3PluginTask t = (S3PluginTask) task;
38
+ Optional<String> endpoint = t.getEndpoint();
39
+ Optional<String> region = t.getRegion();
36
40
 
37
41
  AmazonS3ClientBuilder builder = super.defaultS3ClientBuilder(t);
38
42
 
39
- if (t.getEndpoint().isPresent()) {
40
- String endpoint = t.getEndpoint().get();
41
- builder.setEndpointConfiguration(new EndpointConfiguration(
42
- endpoint,
43
- // Although client will treat endpoint's region as the signer region
44
- // if we left this as null, but such that behaviour is undocumented,
45
- // so it is explicitly calculated here for future-proofing.
46
- parseRegion(
47
- toUri(endpoint, getClientConfiguration(task)).getHost(),
48
- S3_SERVICE_NAME)));
43
+ // Favor the `endpoint` configuration, then `region`, if both are absent then `s3.amazonaws.com` will be used.
44
+ if (endpoint.isPresent()) {
45
+ if (region.isPresent()) {
46
+ log.warn("Either configure endpoint or region, " +
47
+ "if both is specified only the endpoint will be in effect.");
48
+ }
49
+ builder.setEndpointConfiguration(new EndpointConfiguration(endpoint.get(), null));
50
+ } else if (region.isPresent()) {
51
+ builder.setRegion(region.get());
52
+ } else {
53
+ // This is to keep the AWS SDK upgrading to 1.11.x to be backward compatible with old configuration.
54
+ //
55
+ // On SDK 1.10.x, when neither endpoint nor region is set explicitly, the client's endpoint will be by
56
+ // default `s3.amazonaws.com`. And for pre-Signature-V4, this will work fine as the bucket's region
57
+ // will be resolved to the appropriate region on server (AWS) side.
58
+ //
59
+ // On SDK 1.11.x, a region will be computed on client side by AwsRegionProvider and the endpoint now will
60
+ // be region-specific `<region>.s3.amazonaws.com` and might be the wrong one.
61
+ //
62
+ // So a default endpoint of `s3.amazonaws.com` when both endpoint and region configs are absent are
63
+ // necessary to make old configurations won't suddenly break. The side effect is that this will render
64
+ // AwsRegionProvider useless. And it's worth to note that Signature-V4 won't work with either versions with
65
+ // no explicit region or endpoint as the region (inferrable from endpoint) are necessary for signing.
66
+ builder.setEndpointConfiguration(new EndpointConfiguration("s3.amazonaws.com", null));
49
67
  }
50
68
 
51
69
  return builder.build();
@@ -1,13 +1,14 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
+ import com.amazonaws.auth.AWSStaticCredentialsProvider;
3
4
  import com.amazonaws.auth.BasicAWSCredentials;
4
5
  import com.amazonaws.auth.BasicSessionCredentials;
5
6
  import com.amazonaws.auth.policy.Policy;
6
7
  import com.amazonaws.auth.policy.Resource;
7
8
  import com.amazonaws.auth.policy.Statement;
8
9
  import com.amazonaws.auth.policy.actions.S3Actions;
9
- import com.amazonaws.internal.StaticCredentialsProvider;
10
- import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
10
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
11
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
11
12
  import com.amazonaws.services.securitytoken.model.Credentials;
12
13
  import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
13
14
  import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
@@ -112,7 +113,6 @@ public class TestAwsCredentials
112
113
  String origAccessKeyId = System.getProperty("aws.accessKeyId");
113
114
  String origSecretKey = System.getProperty("aws.secretKey");
114
115
  try {
115
-
116
116
  ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
117
117
  System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
118
118
  System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
@@ -148,8 +148,9 @@ public class TestAwsCredentials
148
148
 
149
149
  private static BasicSessionCredentials getSessionCredentials()
150
150
  {
151
- AWSSecurityTokenServiceClient stsClient = new AWSSecurityTokenServiceClient(
152
- new StaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY)));
151
+ AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClientBuilder.standard().withCredentials(
152
+ new AWSStaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY))
153
+ ).build();
153
154
 
154
155
  GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
155
156
  getFederationTokenRequest.setDurationSeconds(7200);
@@ -1,5 +1,7 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.Region;
3
5
  import com.google.common.collect.ImmutableList;
4
6
  import com.google.common.collect.ImmutableMap;
5
7
  import org.embulk.EmbulkTestRuntime;
@@ -22,6 +24,7 @@ import org.junit.Test;
22
24
  import java.util.ArrayList;
23
25
  import java.util.List;
24
26
 
27
+ import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
25
28
  import static org.junit.Assert.assertEquals;
26
29
  import static org.junit.Assert.assertFalse;
27
30
  import static org.junit.Assert.assertNull;
@@ -147,6 +150,51 @@ public class TestS3FileInputPlugin
147
150
  }
148
151
  }
149
152
 
153
+
154
+ @Test
155
+ public void configuredEndpoint()
156
+ {
157
+ S3PluginTask task = config.deepCopy()
158
+ .set("endpoint", "s3-ap-southeast-1.amazonaws.com")
159
+ .set("region", "ap-southeast-2")
160
+ .loadConfig(S3PluginTask.class);
161
+ S3FileInputPlugin plugin = runtime.getInstance(S3FileInputPlugin.class);
162
+ AmazonS3 s3Client = plugin.newS3Client(task);
163
+
164
+ // Should not crash and favor the endpoint over the region configuration (there's a warning log though)
165
+ assertEquals(s3Client.getRegion(), Region.AP_Singapore);
166
+ }
167
+
168
+ @Test
169
+ public void configuredRegion()
170
+ {
171
+ S3PluginTask task = config.deepCopy()
172
+ .set("region", "ap-southeast-2")
173
+ .remove("endpoint")
174
+ .loadConfig(S3PluginTask.class);
175
+ S3FileInputPlugin plugin = runtime.getInstance(S3FileInputPlugin.class);
176
+ AmazonS3 s3Client = plugin.newS3Client(task);
177
+
178
+ // Should reflect the region configuration as is
179
+ assertEquals(s3Client.getRegion(), Region.AP_Sydney);
180
+ }
181
+
182
+ @Test
183
+ public void unconfiguredEndpointAndRegion()
184
+ {
185
+ S3PluginTask task = config.deepCopy()
186
+ .remove("endpoint")
187
+ .remove("region")
188
+ .loadConfig(S3PluginTask.class);
189
+ S3FileInputPlugin plugin = runtime.getInstance(S3FileInputPlugin.class);
190
+ AmazonS3 s3Client = plugin.newS3Client(task);
191
+
192
+ // US Standard region is a 'generic' one (s3.amazonaws.com), the expectation here that
193
+ // the S3 client should not eagerly resolves for a specific region on client side.
194
+ // Please refer to org.embulk.input.s3.S3FileInputPlugin#newS3Client for the details.
195
+ assertEquals(s3Client.getRegion(), Region.US_Standard);
196
+ }
197
+
150
198
  static class Control
151
199
  implements InputPlugin.Control
152
200
  {
@@ -1,6 +1,6 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import com.amazonaws.services.s3.AmazonS3Client;
3
+ import com.amazonaws.services.s3.AmazonS3;
4
4
  import com.amazonaws.services.s3.model.GetObjectRequest;
5
5
  import com.amazonaws.services.s3.model.ObjectMetadata;
6
6
  import com.amazonaws.services.s3.model.S3Object;
@@ -27,12 +27,12 @@ public class TestS3InputStreamReopener
27
27
  @Rule
28
28
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
29
29
 
30
- private AmazonS3Client client;
30
+ private AmazonS3 client;
31
31
 
32
32
  @Before
33
33
  public void createResources()
34
34
  {
35
- client = mock(AmazonS3Client.class);
35
+ client = mock(AmazonS3.class);
36
36
  }
37
37
 
38
38
  @Test
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-05 00:00:00.000000000 Z
11
+ date: 2018-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -61,8 +61,8 @@ files:
61
61
  - classpath/aws-java-sdk-kms-1.11.253.jar
62
62
  - classpath/aws-java-sdk-s3-1.11.253.jar
63
63
  - classpath/commons-codec-1.9.jar
64
- - classpath/embulk-input-s3-0.2.12.jar
65
- - classpath/embulk-util-aws-credentials-0.2.12.jar
64
+ - classpath/embulk-input-s3-0.2.13.jar
65
+ - classpath/embulk-util-aws-credentials-0.2.13.jar
66
66
  - classpath/httpclient-4.5.2.jar
67
67
  - classpath/httpcore-4.4.4.jar
68
68
  - classpath/ion-java-1.0.2.jar
Binary file