embulk-input-s3 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db7314fde9364e1d4ec9edbb67f90cfc5c93dbe7
4
- data.tar.gz: b3133c6d3ea81cef907d8cd974b5471186e086b6
3
+ metadata.gz: 857c5f811620e1273a6fc5ec69a4eeefb658f1b0
4
+ data.tar.gz: deb852435fb0cee1d2db0391cd3a8d0f30255638
5
5
  SHA512:
6
- metadata.gz: 0cd7e8ce269c322e7a03262267ae23370a9cff12877b9f33b8bebf1769ebdf093896babe8193324a3f4aa5b37ccae6a67b380e94367ab65fcc381b3d0e9a848e
7
- data.tar.gz: 0e934e0baf997bfb29b3e3ed337daa28133a38a513050931902dc5f3d2cd03f652f6b2cb31e7d8fecd5b7e3dfccab869ede056053892af9e3bab2dc8cbd1ae4b
6
+ metadata.gz: f5dd533e7e7d5fd366268496118c110689bde80d4f5c98469eb9ff1be77734a168080a24f52d3415dcda158ce5cd1ace35c9025fc4b654663c9726f3994b9bc3
7
+ data.tar.gz: 1d5cab1c9c166a6352dbab3a30e65d058740e3b1883bb0776fac7bc6b677b6fe2bc8e047dba6e006f75cae722e0416504e8d6f493c6466dd7d54be3e484c79a7
Binary file
@@ -22,6 +22,7 @@ import com.amazonaws.services.s3.model.ObjectListing;
22
22
  import com.amazonaws.services.s3.model.GetObjectRequest;
23
23
  import com.amazonaws.services.s3.model.S3Object;
24
24
  import com.amazonaws.ClientConfiguration;
25
+ import com.amazonaws.AmazonServiceException;
25
26
  import com.amazonaws.Protocol;
26
27
  import org.embulk.config.Config;
27
28
  import org.embulk.config.ConfigInject;
@@ -31,6 +32,7 @@ import org.embulk.config.TaskSource;
31
32
  import org.embulk.config.ConfigSource;
32
33
  import org.embulk.config.ConfigDiff;
33
34
  import org.embulk.config.TaskReport;
35
+ import org.embulk.config.ConfigException;
34
36
  import org.embulk.spi.BufferAllocator;
35
37
  import org.embulk.spi.Exec;
36
38
  import org.embulk.spi.FileInputPlugin;
@@ -139,17 +141,29 @@ public abstract class AbstractS3FileInputPlugin
139
141
 
140
142
  private FileList listFiles(PluginTask task)
141
143
  {
142
- AmazonS3Client client = newS3Client(task);
143
- String bucketName = task.getBucket();
144
+ try {
145
+ AmazonS3Client client = newS3Client(task);
146
+ String bucketName = task.getBucket();
144
147
 
145
- if (task.getPathPrefix().equals("/")) {
146
- log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
147
- }
148
+ if (task.getPathPrefix().equals("/")) {
149
+ log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
150
+ }
148
151
 
149
- FileList.Builder builder = new FileList.Builder(task);
150
- listS3FilesByPrefix(builder, client, bucketName,
151
- task.getPathPrefix(), task.getLastPath());
152
- return builder.build();
152
+ FileList.Builder builder = new FileList.Builder(task);
153
+ listS3FilesByPrefix(builder, client, bucketName,
154
+ task.getPathPrefix(), task.getLastPath());
155
+ return builder.build();
156
+ }
157
+ catch (AmazonServiceException ex) {
158
+ if (ex.getErrorType().equals(AmazonServiceException.ErrorType.Client)) {
159
+ // HTTP 40x errors. auth error, bucket doesn't exist, etc. See AWS document for the full list:
160
+ // http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
161
+ if (ex.getStatusCode() != 400) { // 404 Bad Request is unexpected error
162
+ throw new ConfigException(ex);
163
+ }
164
+ }
165
+ throw ex;
166
+ }
153
167
  }
154
168
 
155
169
  /**
@@ -5,6 +5,8 @@ import java.util.AbstractList;
5
5
  import java.util.ArrayList;
6
6
  import java.util.zip.GZIPInputStream;
7
7
  import java.util.zip.GZIPOutputStream;
8
+ import java.util.regex.Pattern;
9
+ import java.util.regex.Matcher;
8
10
  import java.io.InputStream;
9
11
  import java.io.OutputStream;
10
12
  import java.io.BufferedOutputStream;
@@ -29,6 +31,10 @@ public class FileList
29
31
  {
30
32
  public interface Task
31
33
  {
34
+ @Config("path_match_pattern")
35
+ @ConfigDefault("\".*\"")
36
+ String getPathMatchPattern();
37
+
32
38
  @Config("total_file_count_limit")
33
39
  @ConfigDefault("2147483647")
34
40
  int getTotalFileCountLimit();
@@ -63,17 +69,21 @@ public class FileList
63
69
  private String last = null;
64
70
 
65
71
  private int limitCount = Integer.MAX_VALUE;
72
+ private Pattern pathMatchPattern;
73
+
66
74
  private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
67
75
 
68
76
  public Builder(Task task)
69
77
  {
70
78
  this();
71
79
  this.limitCount = task.getTotalFileCountLimit();
80
+ this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
72
81
  }
73
82
 
74
83
  public Builder(ConfigSource config)
75
84
  {
76
85
  this();
86
+ this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
77
87
  this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
78
88
  }
79
89
 
@@ -94,6 +104,12 @@ public class FileList
94
104
  return this;
95
105
  }
96
106
 
107
+ public Builder pathMatchPattern(String pattern)
108
+ {
109
+ this.pathMatchPattern = Pattern.compile(pattern);
110
+ return this;
111
+ }
112
+
97
113
  public int size()
98
114
  {
99
115
  return entries.size();
@@ -104,6 +120,7 @@ public class FileList
104
120
  return size() < limitCount;
105
121
  }
106
122
 
123
+ // returns true if this file is used
107
124
  public synchronized boolean add(String path, long size)
108
125
  {
109
126
  // TODO throw IllegalStateException if stream is already closed
@@ -112,8 +129,9 @@ public class FileList
112
129
  return false;
113
130
  }
114
131
 
115
- // TODO in the future, support some other filtering parameters (file name suffix filter, regex filter, etc)
116
- // and return false if filtered out.
132
+ if (!pathMatchPattern.matcher(path).matches()) {
133
+ return false;
134
+ }
117
135
 
118
136
  int index = entries.size();
119
137
  entries.add(new Entry(index, size));
@@ -0,0 +1,173 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.amazonaws.auth.BasicAWSCredentials;
4
+ import com.amazonaws.auth.BasicSessionCredentials;
5
+ import com.amazonaws.auth.policy.Policy;
6
+ import com.amazonaws.auth.policy.Resource;
7
+ import com.amazonaws.auth.policy.Statement;
8
+ import com.amazonaws.auth.policy.actions.S3Actions;
9
+ import com.amazonaws.internal.StaticCredentialsProvider;
10
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
11
+ import com.amazonaws.services.securitytoken.model.Credentials;
12
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
13
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
14
+ import org.embulk.EmbulkTestRuntime;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigSource;
17
+ import org.embulk.input.s3.TestS3FileInputPlugin.Control;
18
+ import org.embulk.spi.FileInputRunner;
19
+ import org.embulk.spi.TestPageBuilderReader;
20
+ import org.junit.Before;
21
+ import org.junit.BeforeClass;
22
+ import org.junit.Rule;
23
+ import org.junit.Test;
24
+
25
+ import static org.embulk.input.s3.TestS3FileInputPlugin.assertRecords;
26
+ import static org.embulk.input.s3.TestS3FileInputPlugin.parserConfig;
27
+ import static org.embulk.input.s3.TestS3FileInputPlugin.schemaConfig;
28
+ import static org.junit.Assert.assertEquals;
29
+ import static org.junit.Assume.assumeNotNull;
30
+
31
+ public class TestAwsCredentials
32
+ {
33
+ private static String EMBULK_S3_TEST_BUCKET;
34
+ private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
35
+ private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
36
+ private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
37
+
38
+ /*
39
+ * This test case requires environment variables:
40
+ * EMBULK_S3_TEST_BUCKET
41
+ * EMBULK_S3_TEST_ACCESS_KEY_ID
42
+ * EMBULK_S3_TEST_SECRET_ACCESS_KEY
43
+ * If the variables not set, the test case is skipped.
44
+ */
45
+ @BeforeClass
46
+ public static void initializeConstantVariables()
47
+ {
48
+ EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
49
+ EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
50
+ EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
51
+ assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
52
+ }
53
+
54
+ @Rule
55
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
56
+
57
+ private ConfigSource config;
58
+ private FileInputRunner runner;
59
+ private TestPageBuilderReader.MockPageOutput output;
60
+
61
+ @Before
62
+ public void createResources()
63
+ {
64
+ config = runtime.getExec().newConfigSource()
65
+ .set("type", "s3")
66
+ .set("bucket", EMBULK_S3_TEST_BUCKET)
67
+ .set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
68
+ .set("parser", parserConfig(schemaConfig()));
69
+ runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
70
+ output = new TestPageBuilderReader.MockPageOutput();
71
+ }
72
+
73
+ private void doTest(ConfigSource config)
74
+ {
75
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
76
+
77
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
78
+ assertRecords(config, output);
79
+ }
80
+
81
+ @Test
82
+ public void useBasic()
83
+ {
84
+ ConfigSource config = this.config.deepCopy()
85
+ .set("auth_method", "basic")
86
+ .set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
87
+ .set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
88
+ doTest(config);
89
+ }
90
+
91
+ @Test
92
+ public void useEnv()
93
+ {
94
+ // TODO
95
+ }
96
+
97
+ @Test
98
+ public void useInstance()
99
+ {
100
+ // TODO
101
+ }
102
+
103
+ @Test
104
+ public void useProfile()
105
+ {
106
+ // TODO
107
+ }
108
+
109
+ @Test
110
+ public void useProperties()
111
+ {
112
+ String origAccessKeyId = System.getProperty("aws.accessKeyId");
113
+ String origSecretKey = System.getProperty("aws.secretKey");
114
+ try {
115
+
116
+ ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
117
+ System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
118
+ System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
119
+ doTest(config);
120
+ }
121
+ finally {
122
+ if (origAccessKeyId != null) {
123
+ System.setProperty("aws.accessKeyId", origAccessKeyId);
124
+ }
125
+ if (origSecretKey != null) {
126
+ System.setProperty("aws.secretKey", origAccessKeyId);
127
+ }
128
+ }
129
+ }
130
+
131
+ @Test
132
+ public void useAnonymous()
133
+ {
134
+ // TODO
135
+ }
136
+
137
+ @Test
138
+ public void useSession()
139
+ {
140
+ BasicSessionCredentials sessionCredentials = getSessionCredentials();
141
+ ConfigSource config = this.config.deepCopy()
142
+ .set("auth_method", "session")
143
+ .set("access_key_id", sessionCredentials.getAWSAccessKeyId())
144
+ .set("secret_access_key", sessionCredentials.getAWSSecretKey())
145
+ .set("session_token", sessionCredentials.getSessionToken());
146
+ doTest(config);
147
+ }
148
+
149
+ private static BasicSessionCredentials getSessionCredentials()
150
+ {
151
+ AWSSecurityTokenServiceClient stsClient = new AWSSecurityTokenServiceClient(
152
+ new StaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY)));
153
+
154
+ GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
155
+ getFederationTokenRequest.setDurationSeconds(7200);
156
+ getFederationTokenRequest.setName("dummy");
157
+
158
+ Policy policy = new Policy().withStatements(new Statement(Statement.Effect.Allow)
159
+ .withActions(S3Actions.ListObjects, S3Actions.GetObject)
160
+ .withResources(
161
+ new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET + "/" + EMBULK_S3_TEST_PATH_PREFIX + "/*"),
162
+ new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET)));
163
+ getFederationTokenRequest.setPolicy(policy.toJson());
164
+
165
+ GetFederationTokenResult federationTokenResult = stsClient.getFederationToken(getFederationTokenRequest);
166
+ Credentials sessionCredentials = federationTokenResult.getCredentials();
167
+
168
+ return new BasicSessionCredentials(
169
+ sessionCredentials.getAccessKeyId(),
170
+ sessionCredentials.getSecretAccessKey(),
171
+ sessionCredentials.getSessionToken());
172
+ }
173
+ }
@@ -1,236 +1,220 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import com.amazonaws.services.s3.AmazonS3Client;
4
- import com.amazonaws.services.s3.model.GetObjectRequest;
5
- import com.amazonaws.services.s3.model.ListObjectsRequest;
6
- import com.amazonaws.services.s3.model.ObjectListing;
7
- import com.amazonaws.services.s3.model.ObjectMetadata;
8
- import com.amazonaws.services.s3.model.S3Object;
9
- import com.amazonaws.services.s3.model.S3ObjectInputStream;
10
- import com.amazonaws.services.s3.model.S3ObjectSummary;
11
- import com.google.common.base.Optional;
12
3
  import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.ImmutableMap;
13
5
  import org.embulk.EmbulkTestRuntime;
14
6
  import org.embulk.config.ConfigDiff;
15
7
  import org.embulk.config.ConfigSource;
16
8
  import org.embulk.config.TaskReport;
17
9
  import org.embulk.config.TaskSource;
18
- import org.embulk.input.s3.AbstractS3FileInputPlugin.PluginTask;
19
- import org.embulk.input.s3.AbstractS3FileInputPlugin.S3FileInput;
20
- import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
21
- import org.embulk.spi.Exec;
22
- import org.embulk.spi.FileInputPlugin;
23
- import org.embulk.spi.util.LineDecoder;
10
+ import org.embulk.spi.FileInputRunner;
11
+ import org.embulk.spi.InputPlugin;
12
+ import org.embulk.spi.PageOutput;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
15
+ import org.embulk.spi.util.Pages;
16
+ import org.embulk.standards.CsvParserPlugin;
24
17
  import org.junit.Before;
18
+ import org.junit.BeforeClass;
25
19
  import org.junit.Rule;
26
20
  import org.junit.Test;
27
21
 
28
- import java.io.ByteArrayInputStream;
29
- import java.util.Arrays;
22
+ import java.util.ArrayList;
30
23
  import java.util.List;
31
24
 
32
- import static org.junit.Assert.*;
33
- import static org.mockito.Matchers.any;
34
- import static org.mockito.Mockito.doReturn;
35
- import static org.mockito.Mockito.mock;
36
- import static org.mockito.Mockito.spy;
25
+ import static org.junit.Assert.assertEquals;
26
+ import static org.junit.Assert.assertNull;
27
+ import static org.junit.Assume.assumeNotNull;
37
28
 
38
29
  public class TestS3FileInputPlugin
39
30
  {
31
+ private static String EMBULK_S3_TEST_BUCKET;
32
+ private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
33
+ private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
34
+ private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
35
+
36
+ /*
37
+ * This test case requires environment variables:
38
+ * EMBULK_S3_TEST_BUCKET
39
+ * EMBULK_S3_TEST_ACCESS_KEY_ID
40
+ * EMBULK_S3_TEST_SECRET_ACCESS_KEY
41
+ * If the variables not set, the test case is skipped.
42
+ */
43
+ @BeforeClass
44
+ public static void initializeConstantVariables()
45
+ {
46
+ EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
47
+ EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
48
+ EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
49
+ assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
50
+ }
51
+
40
52
  @Rule
41
53
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
42
54
 
43
55
  private ConfigSource config;
44
- private S3FileInputPlugin plugin;
45
- private AmazonS3Client client;
56
+ private FileInputRunner runner;
57
+ private MockPageOutput output;
46
58
 
47
59
  @Before
48
60
  public void createResources()
49
61
  {
50
- config = config();
51
- plugin = spy(runtime.getInstance(S3FileInputPlugin.class));
52
- client = mock(AmazonS3Client.class);
62
+ config = runtime.getExec().newConfigSource()
63
+ .set("type", "s3")
64
+ .set("bucket", EMBULK_S3_TEST_BUCKET)
65
+ .set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
66
+ .set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY)
67
+ .set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
68
+ .set("parser", parserConfig(schemaConfig()));
69
+ runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
70
+ output = new MockPageOutput();
53
71
  }
54
72
 
55
73
  @Test
56
- public void checkS3ClientCreatedSuccessfully()
74
+ public void simpleTest()
57
75
  {
58
- PluginTask task = config().loadConfig(plugin.getTaskClass());
59
- plugin.newS3Client(task);
76
+ ConfigSource config = this.config.deepCopy();
77
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
78
+
79
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
80
+ assertRecords(config, output);
60
81
  }
61
82
 
62
83
  @Test
63
- public void listS3FilesByPrefix()
84
+ public void useLastPath()
85
+ throws Exception
64
86
  {
65
- // AWSS3Client returns list1 for the first iteration and list2 next.
66
- List<S3ObjectSummary> list1 = s3objects("in/", 0L, "in/file/", 0L, "in/file/sample.csv.gz", 12345L);
67
- List<S3ObjectSummary> list2 = s3objects("sample2.csv.gz", 0L);
68
- ObjectListing ol = mock(ObjectListing.class);
69
-
70
- doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
71
- doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
72
- doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
73
-
74
- // It counts only size != 0 files.
75
- FileList.Builder builder = new FileList.Builder();
76
- S3FileInputPlugin.listS3FilesByPrefix(builder, client, "bucketName", "prefix", Optional.<String>absent());
77
- assertEquals(1, builder.size());
87
+ ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
88
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
89
+
90
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
91
+ assertEquals(0, getRecords(config, output).size());
78
92
  }
79
93
 
80
94
  @Test
81
- public void checkLastPath()
95
+ public void emptyFilesWithLastPath()
96
+ throws Exception
82
97
  {
83
- doReturn(null).when(client).listObjects(any(ListObjectsRequest.class));
84
- doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
85
-
86
- { // set a last file to last_path
87
- ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
88
- doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
89
-
90
- ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
91
- @Override
92
- public List<TaskReport> run(TaskSource taskSource, int taskCount)
93
- {
94
- assertEquals(3, taskCount);
95
- List<String> files = fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles());
96
- assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
97
- return emptyTaskReports(taskCount);
98
- }
99
- });
100
-
101
- assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
102
- }
98
+ ConfigSource config = this.config.deepCopy()
99
+ .set("path_prefix", "empty_files_prefix")
100
+ .set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
101
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
103
102
 
104
- { // if files are empty and last_path is not specified, last_path is not set.
105
- ObjectListing listing = listing("in/aa", 0L);
106
- doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
107
-
108
- ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
109
- @Override
110
- public List<TaskReport> run(TaskSource taskSource, int taskCount)
111
- {
112
- assertEquals(0, taskCount);
113
- assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
114
- return emptyTaskReports(taskCount);
115
- }
116
- });
117
-
118
- assertEquals(null, configDiff.get(String.class, "last_path", null));
119
- }
120
-
121
- { // if files are empty, keep the previous last_path.
122
- config.set("last_path", "in/bb");
123
-
124
- ObjectListing listing = listing("in/aa", 0L);
125
- doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
126
-
127
- ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
128
- @Override
129
- public List<TaskReport> run(TaskSource taskSource, int taskCount) {
130
- assertEquals(0, taskCount);
131
- assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
132
- return emptyTaskReports(taskCount);
133
- }
134
- });
135
-
136
- assertEquals("in/bb", configDiff.get(String.class, "last_path"));
137
- }
103
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path")); // keep the last_path
104
+ assertEquals(0, getRecords(config, output).size());
138
105
  }
139
106
 
140
107
  @Test
141
- public void checkS3FileInputByOpen()
108
+ public void useTotalFileCountLimit()
142
109
  throws Exception
143
110
  {
144
- doReturn(s3object("in/aa/a", "aa")).when(client).getObject(any(GetObjectRequest.class));
145
- doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
146
-
147
- PluginTask task = config.loadConfig(plugin.getTaskClass());
148
- FileList.Builder builder = new FileList.Builder();
149
- builder.add("in/aa/a", 100);
150
- task.setFiles(builder.build());
151
-
152
- StringBuilder sbuf = new StringBuilder();
153
- try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
154
- LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
155
- while (d.nextFile()) {
156
- sbuf.append(d.poll());
157
- }
158
- }
159
- assertEquals("aa", sbuf.toString());
160
- }
111
+ ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
112
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
161
113
 
162
- public static ConfigSource config()
163
- {
164
- return Exec.newConfigSource()
165
- .set("bucket", "my_bucket")
166
- .set("path_prefix", "my_path_prefix")
167
- .set("access_key_id", "my_access_key_id")
168
- .set("secret_access_key", "my_secret_access_key");
114
+ assertNull(configDiff.get(String.class, "last_path"));
115
+ assertEquals(0, getRecords(config, output).size());
169
116
  }
170
117
 
171
- static ObjectListing listing(Object... keySizes)
118
+ @Test
119
+ public void usePathMatchPattern()
120
+ throws Exception
172
121
  {
173
- ObjectListing listing = mock(ObjectListing.class);
174
- if (keySizes == null) {
175
- return listing;
176
- }
122
+ ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/match/");
123
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
177
124
 
178
- List<S3ObjectSummary> s3objects = s3objects(keySizes);
179
- doReturn(s3objects).when(listing).getObjectSummaries();
180
- doReturn(null).when(listing).getNextMarker();
181
- return listing;
125
+ assertNull(configDiff.get(String.class, "last_path"));
126
+ assertEquals(0, getRecords(config, output).size());
182
127
  }
183
128
 
184
- static List<S3ObjectSummary> s3objects(Object... keySizes)
129
+ static class Control
130
+ implements InputPlugin.Control
185
131
  {
186
- ImmutableList.Builder<S3ObjectSummary> builder = new ImmutableList.Builder<>();
187
- if (keySizes == null) {
188
- return builder.build();
132
+ private FileInputRunner runner;
133
+ private PageOutput output;
134
+
135
+ Control(FileInputRunner runner, PageOutput output)
136
+ {
137
+ this.runner = runner;
138
+ this.output = output;
189
139
  }
190
140
 
191
- for (int i = 0; i < keySizes.length; i += 2) {
192
- String key = (String) keySizes[i];
193
- long size = (Long) keySizes[i + 1];
194
- builder.add(s3object(key, size));
141
+ @Override
142
+ public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
143
+ {
144
+ List<TaskReport> reports = new ArrayList<>();
145
+ for (int i = 0; i < taskCount; i++) {
146
+ reports.add(runner.run(taskSource, schema, i, output));
147
+ }
148
+ return reports;
195
149
  }
196
- return builder.build();
197
150
  }
198
151
 
199
- static S3ObjectSummary s3object(String key, long size)
152
+ static ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
200
153
  {
201
- S3ObjectSummary o = new S3ObjectSummary();
202
- o.setKey(key);
203
- o.setSize(size);
204
- return o;
154
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
155
+ builder.put("type", "csv");
156
+ builder.put("newline", "CRLF");
157
+ builder.put("delimiter", ",");
158
+ builder.put("quote", "\"");
159
+ builder.put("escape", "\"");
160
+ builder.put("trim_if_not_quoted", false);
161
+ builder.put("skip_header_lines", 0);
162
+ builder.put("allow_extra_columns", false);
163
+ builder.put("allow_optional_columns", false);
164
+ builder.put("columns", schemaConfig);
165
+ return builder.build();
205
166
  }
206
167
 
207
- static S3Object s3object(String key, String value)
168
+ static ImmutableList<Object> schemaConfig()
208
169
  {
209
- S3Object o = new S3Object();
210
- o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
211
- ObjectMetadata om = new ObjectMetadata();
212
- om.setContentLength(value.length());
213
- o.setObjectMetadata(om);
214
- return o;
170
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
171
+ builder.add(ImmutableMap.of("name", "timestamp", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
172
+ builder.add(ImmutableMap.of("name", "host", "type", "string"));
173
+ builder.add(ImmutableMap.of("name", "path", "type", "string"));
174
+ builder.add(ImmutableMap.of("name", "method", "type", "string"));
175
+ builder.add(ImmutableMap.of("name", "referer", "type", "string"));
176
+ builder.add(ImmutableMap.of("name", "code", "type", "long"));
177
+ builder.add(ImmutableMap.of("name", "agent", "type", "string"));
178
+ builder.add(ImmutableMap.of("name", "user", "type", "string"));
179
+ builder.add(ImmutableMap.of("name", "size", "type", "long"));
180
+ return builder.build();
215
181
  }
216
182
 
217
- static List<TaskReport> emptyTaskReports(int taskCount)
183
+ static void assertRecords(ConfigSource config, MockPageOutput output)
218
184
  {
219
- ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
220
- for (int i = 0; i < taskCount; i++) {
221
- reports.add(Exec.newTaskReport());
185
+ List<Object[]> records = getRecords(config, output);
186
+
187
+ assertEquals(2, records.size());
188
+ {
189
+ Object[] record = records.get(0);
190
+ assertEquals("2014-10-02 22:15:39 UTC", record[0].toString());
191
+ assertEquals("84.186.29.187", record[1]);
192
+ assertEquals("/category/electronics", record[2]);
193
+ assertEquals("GET", record[3]);
194
+ assertEquals("/category/music", record[4]);
195
+ assertEquals(200L, record[5]);
196
+ assertEquals("Mozilla/5.0", record[6]);
197
+ assertEquals("-", record[7]);
198
+ assertEquals(136L, record[8]);
199
+ }
200
+
201
+ {
202
+ Object[] record = records.get(1);
203
+ assertEquals("2014-10-02 22:15:01 UTC", record[0].toString());
204
+ assertEquals("140.36.216.47", record[1]);
205
+ assertEquals("/category/music?from=10", record[2]);
206
+ assertEquals("GET", record[3]);
207
+ assertEquals("-", record[4]);
208
+ assertEquals(200L, record[5]);
209
+ assertEquals("Mozilla/5.0", record[6]);
210
+ assertEquals("-", record[7]);
211
+ assertEquals(70L, record[8]);
222
212
  }
223
- return reports.build();
224
213
  }
225
214
 
226
- private static List<String> fileListToList(FileList list)
215
+ static List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
227
216
  {
228
- ImmutableList.Builder<String> builder = ImmutableList.builder();
229
- for (int i=0; i < list.getTaskCount(); i++) {
230
- for (String path : list.get(i)) {
231
- builder.add(path);
232
- }
233
- }
234
- return builder.build();
217
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
218
+ return Pages.toObjects(schema, output.pages);
235
219
  }
236
220
  }
@@ -2,6 +2,9 @@ package org.embulk.input.s3;
2
2
 
3
3
  import com.amazonaws.services.s3.AmazonS3Client;
4
4
  import com.amazonaws.services.s3.model.GetObjectRequest;
5
+ import com.amazonaws.services.s3.model.ObjectMetadata;
6
+ import com.amazonaws.services.s3.model.S3Object;
7
+ import com.amazonaws.services.s3.model.S3ObjectInputStream;
5
8
  import org.embulk.EmbulkTestRuntime;
6
9
  import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
7
10
  import org.junit.Before;
@@ -9,11 +12,11 @@ import org.junit.Rule;
9
12
  import org.junit.Test;
10
13
 
11
14
  import java.io.BufferedReader;
15
+ import java.io.ByteArrayInputStream;
12
16
  import java.io.InputStream;
13
17
  import java.io.InputStreamReader;
14
18
 
15
19
  import static org.junit.Assert.assertEquals;
16
- import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
17
20
  import static org.mockito.Matchers.any;
18
21
  import static org.mockito.Mockito.doReturn;
19
22
  import static org.mockito.Mockito.doThrow;
@@ -60,4 +63,14 @@ public class TestS3InputStreamReopener
60
63
  }
61
64
  }
62
65
  }
66
+
67
+ static S3Object s3object(String key, String value)
68
+ {
69
+ S3Object o = new S3Object();
70
+ o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
71
+ ObjectMetadata om = new ObjectMetadata();
72
+ om.setContentLength(value.length());
73
+ o.setObjectMetadata(om);
74
+ return o;
75
+ }
63
76
  }
@@ -0,0 +1,3 @@
1
+ timestamp,host,path,method,referer,code,agent,user,size
2
+ 2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136
3
+ 2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-18 00:00:00.000000000 Z
11
+ date: 2015-12-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,13 +52,15 @@ files:
52
52
  - src/main/java/org/embulk/input/s3/AwsCredentialsTask.java
53
53
  - src/main/java/org/embulk/input/s3/FileList.java
54
54
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
55
+ - src/test/java/org/embulk/input/s3/TestAwsCredentials.java
55
56
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
56
57
  - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
58
+ - src/test/resources/sample_01.csv
57
59
  - classpath/aws-java-sdk-core-1.10.33.jar
58
60
  - classpath/aws-java-sdk-kms-1.10.33.jar
59
61
  - classpath/aws-java-sdk-s3-1.10.33.jar
60
62
  - classpath/commons-codec-1.6.jar
61
- - classpath/embulk-input-s3-0.2.4.jar
63
+ - classpath/embulk-input-s3-0.2.5.jar
62
64
  - classpath/httpclient-4.3.6.jar
63
65
  - classpath/httpcore-4.3.3.jar
64
66
  - classpath/jcl-over-slf4j-1.7.12.jar
Binary file