embulk-input-s3 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99dc2bbe73f61b74e92b3f345c963b73ebb75bd0
4
- data.tar.gz: 6c42a7ae99a51dad4d9ec4c20b629a9b235db8b3
3
+ metadata.gz: f74f5e05b21a26a23ae93264b7fb5a6624833eda
4
+ data.tar.gz: 9dd80d007fb72660b3670845a6530a2ca75f316f
5
5
  SHA512:
6
- metadata.gz: 8dee445a23d719bb9f7bf1fd9dde6f0d25faa2e38e8eec567e8d7a61518841f933caf8e35ce79a27a70327d7b75ab65eb1cb54327d7d4e664c848ed3fc24b265
7
- data.tar.gz: 728534ca59e6c3d334335f821d0834bc04f8fd76c5934adfa62132b638a81b1b6dc12874ee718d8352adabe1de3bf26eee3f1a8b3bcf8d117f74076cdc4a3935
6
+ metadata.gz: d93ad5992045608cb56d6a4753119b6dbe6c84c71ce0a360a2cc2001a73f9cb694c4250045e96ccb119cc782278adfaa8087f4b412714fb9ad8a56bcca18774d
7
+ data.tar.gz: db5c37847557095a02e86b3d0d75ad97e13b1e26744d055d38e688e527166c505d3075f5b65b8f16e9d2290bce25800fd8f316509b70710dab4d2207431a1e2f
@@ -1,45 +1,45 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import java.util.List;
4
- import java.util.Iterator;
5
- import java.io.IOException;
6
- import java.io.InterruptedIOException;
7
- import java.io.InputStream;
8
-
3
+ import com.amazonaws.AmazonServiceException;
4
+ import com.amazonaws.ClientConfiguration;
5
+ import com.amazonaws.Protocol;
6
+ import com.amazonaws.auth.AWSCredentialsProvider;
9
7
  import com.amazonaws.services.s3.AmazonS3;
10
8
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
11
- import com.google.common.annotations.VisibleForTesting;
12
- import com.google.common.base.Optional;
13
- import com.google.common.base.Throwables;
14
- import org.slf4j.Logger;
15
- import com.amazonaws.auth.AWSCredentialsProvider;
9
+ import com.amazonaws.services.s3.model.GetObjectRequest;
16
10
  import com.amazonaws.services.s3.model.ListObjectsRequest;
17
- import com.amazonaws.services.s3.model.S3ObjectSummary;
18
11
  import com.amazonaws.services.s3.model.ObjectListing;
19
- import com.amazonaws.services.s3.model.GetObjectRequest;
20
12
  import com.amazonaws.services.s3.model.S3Object;
21
- import com.amazonaws.ClientConfiguration;
22
- import com.amazonaws.AmazonServiceException;
23
- import com.amazonaws.Protocol;
13
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
14
+ import com.google.common.annotations.VisibleForTesting;
15
+ import com.google.common.base.Optional;
16
+ import com.google.common.base.Throwables;
24
17
  import org.embulk.config.Config;
25
- import org.embulk.config.ConfigInject;
26
18
  import org.embulk.config.ConfigDefault;
27
- import org.embulk.config.Task;
28
- import org.embulk.config.TaskSource;
29
- import org.embulk.config.ConfigSource;
30
19
  import org.embulk.config.ConfigDiff;
31
- import org.embulk.config.TaskReport;
32
20
  import org.embulk.config.ConfigException;
21
+ import org.embulk.config.ConfigInject;
22
+ import org.embulk.config.ConfigSource;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskReport;
25
+ import org.embulk.config.TaskSource;
33
26
  import org.embulk.spi.BufferAllocator;
34
27
  import org.embulk.spi.Exec;
35
28
  import org.embulk.spi.FileInputPlugin;
36
29
  import org.embulk.spi.TransactionalFileInput;
37
30
  import org.embulk.spi.util.InputStreamFileInput;
38
31
  import org.embulk.spi.util.ResumableInputStream;
39
- import org.embulk.spi.util.RetryExecutor.Retryable;
40
32
  import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
33
+ import org.embulk.spi.util.RetryExecutor.Retryable;
41
34
  import org.embulk.util.aws.credentials.AwsCredentials;
42
35
  import org.embulk.util.aws.credentials.AwsCredentialsTask;
36
+ import org.slf4j.Logger;
37
+
38
+ import java.io.IOException;
39
+ import java.io.InputStream;
40
+ import java.io.InterruptedIOException;
41
+ import java.util.Iterator;
42
+ import java.util.List;
43
43
 
44
44
  import static org.embulk.spi.util.RetryExecutor.retryExecutor;
45
45
 
@@ -134,12 +134,14 @@ public abstract class AbstractS3FileInputPlugin
134
134
  * e.g., {@link AmazonS3#setEndpoint} will throw a runtime {@link UnsupportedOperationException}
135
135
  * Subclass's customization should be done through {@link AbstractS3FileInputPlugin#defaultS3ClientBuilder}.
136
136
  */
137
- protected AmazonS3 newS3Client(PluginTask task) {
137
+ protected AmazonS3 newS3Client(PluginTask task)
138
+ {
138
139
  return defaultS3ClientBuilder(task).build();
139
140
  }
140
141
 
141
142
  /** A base builder for the subclasses to then customize. */
142
- protected AmazonS3ClientBuilder defaultS3ClientBuilder(PluginTask task) {
143
+ protected AmazonS3ClientBuilder defaultS3ClientBuilder(PluginTask task)
144
+ {
143
145
  return AmazonS3ClientBuilder
144
146
  .standard()
145
147
  .withCredentials(getCredentialsProvider(task))
@@ -158,7 +160,7 @@ public abstract class AbstractS3FileInputPlugin
158
160
  //clientConfig.setProtocol(Protocol.HTTP);
159
161
  clientConfig.setMaxConnections(50); // SDK default: 50
160
162
  clientConfig.setMaxErrorRetry(3); // SDK default: 3
161
- clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
163
+ clientConfig.setSocketTimeout(8 * 60 * 1000); // SDK default: 50*1000
162
164
 
163
165
  // set http proxy
164
166
  if (task.getHttpProxy().isPresent()) {
@@ -276,7 +278,7 @@ public abstract class AbstractS3FileInputPlugin
276
278
  return retryExecutor()
277
279
  .withRetryLimit(3)
278
280
  .withInitialRetryWait(500)
279
- .withMaxRetryWait(30*1000)
281
+ .withMaxRetryWait(30 * 1000)
280
282
  .runInterruptible(new Retryable<InputStream>() {
281
283
  @Override
282
284
  public InputStream call() throws InterruptedIOException
@@ -297,10 +299,11 @@ public abstract class AbstractS3FileInputPlugin
297
299
  throws RetryGiveupException
298
300
  {
299
301
  String message = String.format("S3 GET request failed. Retrying %d/%d after %d seconds. Message: %s",
300
- retryCount, retryLimit, retryWait/1000, exception.getMessage());
302
+ retryCount, retryLimit, retryWait / 1000, exception.getMessage());
301
303
  if (retryCount % 3 == 0) {
302
304
  log.warn(message, exception);
303
- } else {
305
+ }
306
+ else {
304
307
  log.warn(message);
305
308
  }
306
309
  }
@@ -311,10 +314,12 @@ public abstract class AbstractS3FileInputPlugin
311
314
  {
312
315
  }
313
316
  });
314
- } catch (RetryGiveupException ex) {
317
+ }
318
+ catch (RetryGiveupException ex) {
315
319
  Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
316
320
  throw Throwables.propagate(ex.getCause());
317
- } catch (InterruptedException ex) {
321
+ }
322
+ catch (InterruptedException ex) {
318
323
  throw new InterruptedIOException();
319
324
  }
320
325
  }
@@ -329,7 +334,9 @@ public abstract class AbstractS3FileInputPlugin
329
334
  super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
330
335
  }
331
336
 
332
- public void abort() { }
337
+ public void abort()
338
+ {
339
+ }
333
340
 
334
341
  public TaskReport commit()
335
342
  {
@@ -337,7 +344,9 @@ public abstract class AbstractS3FileInputPlugin
337
344
  }
338
345
 
339
346
  @Override
340
- public void close() { }
347
+ public void close()
348
+ {
349
+ }
341
350
  }
342
351
 
343
352
  // TODO create single-file InputStreamFileInput utility
@@ -367,6 +376,8 @@ public abstract class AbstractS3FileInputPlugin
367
376
  }
368
377
 
369
378
  @Override
370
- public void close() { }
379
+ public void close()
380
+ {
381
+ }
371
382
  }
372
383
  }
@@ -1,30 +1,31 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import java.util.List;
4
- import java.util.AbstractList;
5
- import java.util.ArrayList;
6
- import java.util.zip.GZIPInputStream;
7
- import java.util.zip.GZIPOutputStream;
8
- import java.util.regex.Pattern;
9
- import java.util.regex.Matcher;
10
- import java.io.InputStream;
11
- import java.io.OutputStream;
12
- import java.io.BufferedOutputStream;
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonIgnore;
5
+ import com.fasterxml.jackson.annotation.JsonProperty;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.base.Throwables;
8
+
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.ConfigSource;
12
+
13
13
  import java.io.BufferedInputStream;
14
+ import java.io.BufferedOutputStream;
14
15
  import java.io.ByteArrayInputStream;
15
16
  import java.io.ByteArrayOutputStream;
17
+
16
18
  import java.io.IOException;
19
+ import java.io.InputStream;
20
+ import java.io.OutputStream;
17
21
  import java.nio.ByteBuffer;
18
22
  import java.nio.charset.StandardCharsets;
19
- import org.embulk.config.Config;
20
- import org.embulk.config.ConfigDefault;
21
- import org.embulk.config.ConfigSource;
22
- import com.google.common.base.Throwables;
23
- import com.google.common.base.Optional;
24
- import com.google.common.collect.ImmutableList;
25
- import com.fasterxml.jackson.annotation.JsonProperty;
26
- import com.fasterxml.jackson.annotation.JsonIgnore;
27
- import com.fasterxml.jackson.annotation.JsonCreator;
23
+ import java.util.AbstractList;
24
+ import java.util.ArrayList;
25
+ import java.util.List;
26
+ import java.util.regex.Pattern;
27
+ import java.util.zip.GZIPInputStream;
28
+ import java.util.zip.GZIPOutputStream;
28
29
 
29
30
  // this class should be moved to embulk-core
30
31
  public class FileList
@@ -60,10 +61,16 @@ public class FileList
60
61
  }
61
62
 
62
63
  @JsonProperty("index")
63
- public int getIndex() { return index; }
64
+ public int getIndex()
65
+ {
66
+ return index;
67
+ }
64
68
 
65
69
  @JsonProperty("size")
66
- public long getSize() { return size; }
70
+ public long getSize()
71
+ {
72
+ return size;
73
+ }
67
74
  }
68
75
 
69
76
  public static class Builder
@@ -2,15 +2,12 @@ package org.embulk.input.s3;
2
2
 
3
3
  import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
4
4
  import com.amazonaws.services.s3.AmazonS3;
5
- import com.amazonaws.services.s3.AmazonS3Client;
6
5
  import com.amazonaws.services.s3.AmazonS3ClientBuilder;
7
6
  import com.google.common.base.Optional;
8
7
  import org.embulk.config.Config;
9
8
  import org.embulk.config.ConfigDefault;
10
-
11
- import static com.amazonaws.services.s3.AmazonS3Client.S3_SERVICE_NAME;
12
- import static com.amazonaws.util.AwsHostNameUtils.parseRegion;
13
- import static com.amazonaws.util.RuntimeHttpUtils.toUri;
9
+ import org.embulk.spi.Exec;
10
+ import org.slf4j.Logger;
14
11
 
15
12
  public class S3FileInputPlugin
16
13
  extends AbstractS3FileInputPlugin
@@ -21,11 +18,16 @@ public class S3FileInputPlugin
21
18
  @Config("endpoint")
22
19
  @ConfigDefault("null")
23
20
  public Optional<String> getEndpoint();
21
+
22
+ @Config("region")
23
+ @ConfigDefault("null")
24
+ public Optional<String> getRegion();
24
25
  }
25
26
 
27
+ private static final Logger log = Exec.getLogger(S3FileInputPlugin.class);
28
+
26
29
  @Override
27
- protected Class<? extends PluginTask> getTaskClass()
28
- {
30
+ protected Class<? extends PluginTask> getTaskClass() {
29
31
  return S3PluginTask.class;
30
32
  }
31
33
 
@@ -33,19 +35,35 @@ public class S3FileInputPlugin
33
35
  protected AmazonS3 newS3Client(PluginTask task)
34
36
  {
35
37
  S3PluginTask t = (S3PluginTask) task;
38
+ Optional<String> endpoint = t.getEndpoint();
39
+ Optional<String> region = t.getRegion();
36
40
 
37
41
  AmazonS3ClientBuilder builder = super.defaultS3ClientBuilder(t);
38
42
 
39
- if (t.getEndpoint().isPresent()) {
40
- String endpoint = t.getEndpoint().get();
41
- builder.setEndpointConfiguration(new EndpointConfiguration(
42
- endpoint,
43
- // Although client will treat endpoint's region as the signer region
44
- // if we left this as null, but such that behaviour is undocumented,
45
- // so it is explicitly calculated here for future-proofing.
46
- parseRegion(
47
- toUri(endpoint, getClientConfiguration(task)).getHost(),
48
- S3_SERVICE_NAME)));
43
+ // Favor the `endpoint` configuration, then `region`, if both are absent then `s3.amazonaws.com` will be used.
44
+ if (endpoint.isPresent()) {
45
+ if (region.isPresent()) {
46
+ log.warn("Either configure endpoint or region, " +
47
+ "if both is specified only the endpoint will be in effect.");
48
+ }
49
+ builder.setEndpointConfiguration(new EndpointConfiguration(endpoint.get(), null));
50
+ } else if (region.isPresent()) {
51
+ builder.setRegion(region.get());
52
+ } else {
53
+ // This is to keep the AWS SDK upgrading to 1.11.x to be backward compatible with old configuration.
54
+ //
55
+ // On SDK 1.10.x, when neither endpoint nor region is set explicitly, the client's endpoint will be by
56
+ // default `s3.amazonaws.com`. And for pre-Signature-V4, this will work fine as the bucket's region
57
+ // will be resolved to the appropriate region on server (AWS) side.
58
+ //
59
+ // On SDK 1.11.x, a region will be computed on client side by AwsRegionProvider and the endpoint now will
60
+ // be region-specific `<region>.s3.amazonaws.com` and might be the wrong one.
61
+ //
62
+ // So a default endpoint of `s3.amazonaws.com` when both endpoint and region configs are absent are
63
+ // necessary to make old configurations won't suddenly break. The side effect is that this will render
64
+ // AwsRegionProvider useless. And it's worth to note that Signature-V4 won't work with either versions with
65
+ // no explicit region or endpoint as the region (inferrable from endpoint) are necessary for signing.
66
+ builder.setEndpointConfiguration(new EndpointConfiguration("s3.amazonaws.com", null));
49
67
  }
50
68
 
51
69
  return builder.build();
@@ -1,13 +1,14 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
+ import com.amazonaws.auth.AWSStaticCredentialsProvider;
3
4
  import com.amazonaws.auth.BasicAWSCredentials;
4
5
  import com.amazonaws.auth.BasicSessionCredentials;
5
6
  import com.amazonaws.auth.policy.Policy;
6
7
  import com.amazonaws.auth.policy.Resource;
7
8
  import com.amazonaws.auth.policy.Statement;
8
9
  import com.amazonaws.auth.policy.actions.S3Actions;
9
- import com.amazonaws.internal.StaticCredentialsProvider;
10
- import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
10
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
11
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
11
12
  import com.amazonaws.services.securitytoken.model.Credentials;
12
13
  import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
13
14
  import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
@@ -112,7 +113,6 @@ public class TestAwsCredentials
112
113
  String origAccessKeyId = System.getProperty("aws.accessKeyId");
113
114
  String origSecretKey = System.getProperty("aws.secretKey");
114
115
  try {
115
-
116
116
  ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
117
117
  System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
118
118
  System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
@@ -148,8 +148,9 @@ public class TestAwsCredentials
148
148
 
149
149
  private static BasicSessionCredentials getSessionCredentials()
150
150
  {
151
- AWSSecurityTokenServiceClient stsClient = new AWSSecurityTokenServiceClient(
152
- new StaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY)));
151
+ AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClientBuilder.standard().withCredentials(
152
+ new AWSStaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY))
153
+ ).build();
153
154
 
154
155
  GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
155
156
  getFederationTokenRequest.setDurationSeconds(7200);
@@ -1,5 +1,7 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
+ import com.amazonaws.services.s3.AmazonS3;
4
+ import com.amazonaws.services.s3.model.Region;
3
5
  import com.google.common.collect.ImmutableList;
4
6
  import com.google.common.collect.ImmutableMap;
5
7
  import org.embulk.EmbulkTestRuntime;
@@ -22,6 +24,7 @@ import org.junit.Test;
22
24
  import java.util.ArrayList;
23
25
  import java.util.List;
24
26
 
27
+ import static org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
25
28
  import static org.junit.Assert.assertEquals;
26
29
  import static org.junit.Assert.assertFalse;
27
30
  import static org.junit.Assert.assertNull;
@@ -147,6 +150,51 @@ public class TestS3FileInputPlugin
147
150
  }
148
151
  }
149
152
 
153
+
154
+ @Test
155
+ public void configuredEndpoint()
156
+ {
157
+ S3PluginTask task = config.deepCopy()
158
+ .set("endpoint", "s3-ap-southeast-1.amazonaws.com")
159
+ .set("region", "ap-southeast-2")
160
+ .loadConfig(S3PluginTask.class);
161
+ S3FileInputPlugin plugin = runtime.getInstance(S3FileInputPlugin.class);
162
+ AmazonS3 s3Client = plugin.newS3Client(task);
163
+
164
+ // Should not crash and favor the endpoint over the region configuration (there's a warning log though)
165
+ assertEquals(s3Client.getRegion(), Region.AP_Singapore);
166
+ }
167
+
168
+ @Test
169
+ public void configuredRegion()
170
+ {
171
+ S3PluginTask task = config.deepCopy()
172
+ .set("region", "ap-southeast-2")
173
+ .remove("endpoint")
174
+ .loadConfig(S3PluginTask.class);
175
+ S3FileInputPlugin plugin = runtime.getInstance(S3FileInputPlugin.class);
176
+ AmazonS3 s3Client = plugin.newS3Client(task);
177
+
178
+ // Should reflect the region configuration as is
179
+ assertEquals(s3Client.getRegion(), Region.AP_Sydney);
180
+ }
181
+
182
+ @Test
183
+ public void unconfiguredEndpointAndRegion()
184
+ {
185
+ S3PluginTask task = config.deepCopy()
186
+ .remove("endpoint")
187
+ .remove("region")
188
+ .loadConfig(S3PluginTask.class);
189
+ S3FileInputPlugin plugin = runtime.getInstance(S3FileInputPlugin.class);
190
+ AmazonS3 s3Client = plugin.newS3Client(task);
191
+
192
+ // US Standard region is a 'generic' one (s3.amazonaws.com), the expectation here that
193
+ // the S3 client should not eagerly resolves for a specific region on client side.
194
+ // Please refer to org.embulk.input.s3.S3FileInputPlugin#newS3Client for the details.
195
+ assertEquals(s3Client.getRegion(), Region.US_Standard);
196
+ }
197
+
150
198
  static class Control
151
199
  implements InputPlugin.Control
152
200
  {
@@ -1,6 +1,6 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import com.amazonaws.services.s3.AmazonS3Client;
3
+ import com.amazonaws.services.s3.AmazonS3;
4
4
  import com.amazonaws.services.s3.model.GetObjectRequest;
5
5
  import com.amazonaws.services.s3.model.ObjectMetadata;
6
6
  import com.amazonaws.services.s3.model.S3Object;
@@ -27,12 +27,12 @@ public class TestS3InputStreamReopener
27
27
  @Rule
28
28
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
29
29
 
30
- private AmazonS3Client client;
30
+ private AmazonS3 client;
31
31
 
32
32
  @Before
33
33
  public void createResources()
34
34
  {
35
- client = mock(AmazonS3Client.class);
35
+ client = mock(AmazonS3.class);
36
36
  }
37
37
 
38
38
  @Test
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-05 00:00:00.000000000 Z
11
+ date: 2018-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -61,8 +61,8 @@ files:
61
61
  - classpath/aws-java-sdk-kms-1.11.253.jar
62
62
  - classpath/aws-java-sdk-s3-1.11.253.jar
63
63
  - classpath/commons-codec-1.9.jar
64
- - classpath/embulk-input-s3-0.2.12.jar
65
- - classpath/embulk-util-aws-credentials-0.2.12.jar
64
+ - classpath/embulk-input-s3-0.2.13.jar
65
+ - classpath/embulk-util-aws-credentials-0.2.13.jar
66
66
  - classpath/httpclient-4.5.2.jar
67
67
  - classpath/httpcore-4.4.4.jar
68
68
  - classpath/ion-java-1.0.2.jar
Binary file