embulk-input-s3 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db7314fde9364e1d4ec9edbb67f90cfc5c93dbe7
4
- data.tar.gz: b3133c6d3ea81cef907d8cd974b5471186e086b6
3
+ metadata.gz: 857c5f811620e1273a6fc5ec69a4eeefb658f1b0
4
+ data.tar.gz: deb852435fb0cee1d2db0391cd3a8d0f30255638
5
5
  SHA512:
6
- metadata.gz: 0cd7e8ce269c322e7a03262267ae23370a9cff12877b9f33b8bebf1769ebdf093896babe8193324a3f4aa5b37ccae6a67b380e94367ab65fcc381b3d0e9a848e
7
- data.tar.gz: 0e934e0baf997bfb29b3e3ed337daa28133a38a513050931902dc5f3d2cd03f652f6b2cb31e7d8fecd5b7e3dfccab869ede056053892af9e3bab2dc8cbd1ae4b
6
+ metadata.gz: f5dd533e7e7d5fd366268496118c110689bde80d4f5c98469eb9ff1be77734a168080a24f52d3415dcda158ce5cd1ace35c9025fc4b654663c9726f3994b9bc3
7
+ data.tar.gz: 1d5cab1c9c166a6352dbab3a30e65d058740e3b1883bb0776fac7bc6b677b6fe2bc8e047dba6e006f75cae722e0416504e8d6f493c6466dd7d54be3e484c79a7
Binary file
@@ -22,6 +22,7 @@ import com.amazonaws.services.s3.model.ObjectListing;
22
22
  import com.amazonaws.services.s3.model.GetObjectRequest;
23
23
  import com.amazonaws.services.s3.model.S3Object;
24
24
  import com.amazonaws.ClientConfiguration;
25
+ import com.amazonaws.AmazonServiceException;
25
26
  import com.amazonaws.Protocol;
26
27
  import org.embulk.config.Config;
27
28
  import org.embulk.config.ConfigInject;
@@ -31,6 +32,7 @@ import org.embulk.config.TaskSource;
31
32
  import org.embulk.config.ConfigSource;
32
33
  import org.embulk.config.ConfigDiff;
33
34
  import org.embulk.config.TaskReport;
35
+ import org.embulk.config.ConfigException;
34
36
  import org.embulk.spi.BufferAllocator;
35
37
  import org.embulk.spi.Exec;
36
38
  import org.embulk.spi.FileInputPlugin;
@@ -139,17 +141,29 @@ public abstract class AbstractS3FileInputPlugin
139
141
 
140
142
  private FileList listFiles(PluginTask task)
141
143
  {
142
- AmazonS3Client client = newS3Client(task);
143
- String bucketName = task.getBucket();
144
+ try {
145
+ AmazonS3Client client = newS3Client(task);
146
+ String bucketName = task.getBucket();
144
147
 
145
- if (task.getPathPrefix().equals("/")) {
146
- log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
147
- }
148
+ if (task.getPathPrefix().equals("/")) {
149
+ log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
150
+ }
148
151
 
149
- FileList.Builder builder = new FileList.Builder(task);
150
- listS3FilesByPrefix(builder, client, bucketName,
151
- task.getPathPrefix(), task.getLastPath());
152
- return builder.build();
152
+ FileList.Builder builder = new FileList.Builder(task);
153
+ listS3FilesByPrefix(builder, client, bucketName,
154
+ task.getPathPrefix(), task.getLastPath());
155
+ return builder.build();
156
+ }
157
+ catch (AmazonServiceException ex) {
158
+ if (ex.getErrorType().equals(AmazonServiceException.ErrorType.Client)) {
159
+ // HTTP 40x errors. auth error, bucket doesn't exist, etc. See AWS document for the full list:
160
+ // http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
161
+ if (ex.getStatusCode() != 400) { // 404 Bad Request is unexpected error
162
+ throw new ConfigException(ex);
163
+ }
164
+ }
165
+ throw ex;
166
+ }
153
167
  }
154
168
 
155
169
  /**
@@ -5,6 +5,8 @@ import java.util.AbstractList;
5
5
  import java.util.ArrayList;
6
6
  import java.util.zip.GZIPInputStream;
7
7
  import java.util.zip.GZIPOutputStream;
8
+ import java.util.regex.Pattern;
9
+ import java.util.regex.Matcher;
8
10
  import java.io.InputStream;
9
11
  import java.io.OutputStream;
10
12
  import java.io.BufferedOutputStream;
@@ -29,6 +31,10 @@ public class FileList
29
31
  {
30
32
  public interface Task
31
33
  {
34
+ @Config("path_match_pattern")
35
+ @ConfigDefault("\".*\"")
36
+ String getPathMatchPattern();
37
+
32
38
  @Config("total_file_count_limit")
33
39
  @ConfigDefault("2147483647")
34
40
  int getTotalFileCountLimit();
@@ -63,17 +69,21 @@ public class FileList
63
69
  private String last = null;
64
70
 
65
71
  private int limitCount = Integer.MAX_VALUE;
72
+ private Pattern pathMatchPattern;
73
+
66
74
  private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
67
75
 
68
76
  public Builder(Task task)
69
77
  {
70
78
  this();
71
79
  this.limitCount = task.getTotalFileCountLimit();
80
+ this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
72
81
  }
73
82
 
74
83
  public Builder(ConfigSource config)
75
84
  {
76
85
  this();
86
+ this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
77
87
  this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
78
88
  }
79
89
 
@@ -94,6 +104,12 @@ public class FileList
94
104
  return this;
95
105
  }
96
106
 
107
+ public Builder pathMatchPattern(String pattern)
108
+ {
109
+ this.pathMatchPattern = Pattern.compile(pattern);
110
+ return this;
111
+ }
112
+
97
113
  public int size()
98
114
  {
99
115
  return entries.size();
@@ -104,6 +120,7 @@ public class FileList
104
120
  return size() < limitCount;
105
121
  }
106
122
 
123
+ // returns true if this file is used
107
124
  public synchronized boolean add(String path, long size)
108
125
  {
109
126
  // TODO throw IllegalStateException if stream is already closed
@@ -112,8 +129,9 @@ public class FileList
112
129
  return false;
113
130
  }
114
131
 
115
- // TODO in the future, support some other filtering parameters (file name suffix filter, regex filter, etc)
116
- // and return false if filtered out.
132
+ if (!pathMatchPattern.matcher(path).matches()) {
133
+ return false;
134
+ }
117
135
 
118
136
  int index = entries.size();
119
137
  entries.add(new Entry(index, size));
@@ -0,0 +1,173 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.amazonaws.auth.BasicAWSCredentials;
4
+ import com.amazonaws.auth.BasicSessionCredentials;
5
+ import com.amazonaws.auth.policy.Policy;
6
+ import com.amazonaws.auth.policy.Resource;
7
+ import com.amazonaws.auth.policy.Statement;
8
+ import com.amazonaws.auth.policy.actions.S3Actions;
9
+ import com.amazonaws.internal.StaticCredentialsProvider;
10
+ import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
11
+ import com.amazonaws.services.securitytoken.model.Credentials;
12
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenRequest;
13
+ import com.amazonaws.services.securitytoken.model.GetFederationTokenResult;
14
+ import org.embulk.EmbulkTestRuntime;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigSource;
17
+ import org.embulk.input.s3.TestS3FileInputPlugin.Control;
18
+ import org.embulk.spi.FileInputRunner;
19
+ import org.embulk.spi.TestPageBuilderReader;
20
+ import org.junit.Before;
21
+ import org.junit.BeforeClass;
22
+ import org.junit.Rule;
23
+ import org.junit.Test;
24
+
25
+ import static org.embulk.input.s3.TestS3FileInputPlugin.assertRecords;
26
+ import static org.embulk.input.s3.TestS3FileInputPlugin.parserConfig;
27
+ import static org.embulk.input.s3.TestS3FileInputPlugin.schemaConfig;
28
+ import static org.junit.Assert.assertEquals;
29
+ import static org.junit.Assume.assumeNotNull;
30
+
31
+ public class TestAwsCredentials
32
+ {
33
+ private static String EMBULK_S3_TEST_BUCKET;
34
+ private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
35
+ private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
36
+ private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
37
+
38
+ /*
39
+ * This test case requires environment variables:
40
+ * EMBULK_S3_TEST_BUCKET
41
+ * EMBULK_S3_TEST_ACCESS_KEY_ID
42
+ * EMBULK_S3_TEST_SECRET_ACCESS_KEY
43
+ * If the variables not set, the test case is skipped.
44
+ */
45
+ @BeforeClass
46
+ public static void initializeConstantVariables()
47
+ {
48
+ EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
49
+ EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
50
+ EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
51
+ assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
52
+ }
53
+
54
+ @Rule
55
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
56
+
57
+ private ConfigSource config;
58
+ private FileInputRunner runner;
59
+ private TestPageBuilderReader.MockPageOutput output;
60
+
61
+ @Before
62
+ public void createResources()
63
+ {
64
+ config = runtime.getExec().newConfigSource()
65
+ .set("type", "s3")
66
+ .set("bucket", EMBULK_S3_TEST_BUCKET)
67
+ .set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
68
+ .set("parser", parserConfig(schemaConfig()));
69
+ runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
70
+ output = new TestPageBuilderReader.MockPageOutput();
71
+ }
72
+
73
+ private void doTest(ConfigSource config)
74
+ {
75
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
76
+
77
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
78
+ assertRecords(config, output);
79
+ }
80
+
81
+ @Test
82
+ public void useBasic()
83
+ {
84
+ ConfigSource config = this.config.deepCopy()
85
+ .set("auth_method", "basic")
86
+ .set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
87
+ .set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
88
+ doTest(config);
89
+ }
90
+
91
+ @Test
92
+ public void useEnv()
93
+ {
94
+ // TODO
95
+ }
96
+
97
+ @Test
98
+ public void useInstance()
99
+ {
100
+ // TODO
101
+ }
102
+
103
+ @Test
104
+ public void useProfile()
105
+ {
106
+ // TODO
107
+ }
108
+
109
+ @Test
110
+ public void useProperties()
111
+ {
112
+ String origAccessKeyId = System.getProperty("aws.accessKeyId");
113
+ String origSecretKey = System.getProperty("aws.secretKey");
114
+ try {
115
+
116
+ ConfigSource config = this.config.deepCopy().set("auth_method", "properties");
117
+ System.setProperty("aws.accessKeyId", EMBULK_S3_TEST_ACCESS_KEY_ID);
118
+ System.setProperty("aws.secretKey", EMBULK_S3_TEST_SECRET_ACCESS_KEY);
119
+ doTest(config);
120
+ }
121
+ finally {
122
+ if (origAccessKeyId != null) {
123
+ System.setProperty("aws.accessKeyId", origAccessKeyId);
124
+ }
125
+ if (origSecretKey != null) {
126
+ System.setProperty("aws.secretKey", origAccessKeyId);
127
+ }
128
+ }
129
+ }
130
+
131
+ @Test
132
+ public void useAnonymous()
133
+ {
134
+ // TODO
135
+ }
136
+
137
+ @Test
138
+ public void useSession()
139
+ {
140
+ BasicSessionCredentials sessionCredentials = getSessionCredentials();
141
+ ConfigSource config = this.config.deepCopy()
142
+ .set("auth_method", "session")
143
+ .set("access_key_id", sessionCredentials.getAWSAccessKeyId())
144
+ .set("secret_access_key", sessionCredentials.getAWSSecretKey())
145
+ .set("session_token", sessionCredentials.getSessionToken());
146
+ doTest(config);
147
+ }
148
+
149
+ private static BasicSessionCredentials getSessionCredentials()
150
+ {
151
+ AWSSecurityTokenServiceClient stsClient = new AWSSecurityTokenServiceClient(
152
+ new StaticCredentialsProvider(new BasicAWSCredentials(EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY)));
153
+
154
+ GetFederationTokenRequest getFederationTokenRequest = new GetFederationTokenRequest();
155
+ getFederationTokenRequest.setDurationSeconds(7200);
156
+ getFederationTokenRequest.setName("dummy");
157
+
158
+ Policy policy = new Policy().withStatements(new Statement(Statement.Effect.Allow)
159
+ .withActions(S3Actions.ListObjects, S3Actions.GetObject)
160
+ .withResources(
161
+ new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET + "/" + EMBULK_S3_TEST_PATH_PREFIX + "/*"),
162
+ new Resource("arn:aws:s3:::" + EMBULK_S3_TEST_BUCKET)));
163
+ getFederationTokenRequest.setPolicy(policy.toJson());
164
+
165
+ GetFederationTokenResult federationTokenResult = stsClient.getFederationToken(getFederationTokenRequest);
166
+ Credentials sessionCredentials = federationTokenResult.getCredentials();
167
+
168
+ return new BasicSessionCredentials(
169
+ sessionCredentials.getAccessKeyId(),
170
+ sessionCredentials.getSecretAccessKey(),
171
+ sessionCredentials.getSessionToken());
172
+ }
173
+ }
@@ -1,236 +1,220 @@
1
1
  package org.embulk.input.s3;
2
2
 
3
- import com.amazonaws.services.s3.AmazonS3Client;
4
- import com.amazonaws.services.s3.model.GetObjectRequest;
5
- import com.amazonaws.services.s3.model.ListObjectsRequest;
6
- import com.amazonaws.services.s3.model.ObjectListing;
7
- import com.amazonaws.services.s3.model.ObjectMetadata;
8
- import com.amazonaws.services.s3.model.S3Object;
9
- import com.amazonaws.services.s3.model.S3ObjectInputStream;
10
- import com.amazonaws.services.s3.model.S3ObjectSummary;
11
- import com.google.common.base.Optional;
12
3
  import com.google.common.collect.ImmutableList;
4
+ import com.google.common.collect.ImmutableMap;
13
5
  import org.embulk.EmbulkTestRuntime;
14
6
  import org.embulk.config.ConfigDiff;
15
7
  import org.embulk.config.ConfigSource;
16
8
  import org.embulk.config.TaskReport;
17
9
  import org.embulk.config.TaskSource;
18
- import org.embulk.input.s3.AbstractS3FileInputPlugin.PluginTask;
19
- import org.embulk.input.s3.AbstractS3FileInputPlugin.S3FileInput;
20
- import org.embulk.input.s3.S3FileInputPlugin.S3PluginTask;
21
- import org.embulk.spi.Exec;
22
- import org.embulk.spi.FileInputPlugin;
23
- import org.embulk.spi.util.LineDecoder;
10
+ import org.embulk.spi.FileInputRunner;
11
+ import org.embulk.spi.InputPlugin;
12
+ import org.embulk.spi.PageOutput;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
15
+ import org.embulk.spi.util.Pages;
16
+ import org.embulk.standards.CsvParserPlugin;
24
17
  import org.junit.Before;
18
+ import org.junit.BeforeClass;
25
19
  import org.junit.Rule;
26
20
  import org.junit.Test;
27
21
 
28
- import java.io.ByteArrayInputStream;
29
- import java.util.Arrays;
22
+ import java.util.ArrayList;
30
23
  import java.util.List;
31
24
 
32
- import static org.junit.Assert.*;
33
- import static org.mockito.Matchers.any;
34
- import static org.mockito.Mockito.doReturn;
35
- import static org.mockito.Mockito.mock;
36
- import static org.mockito.Mockito.spy;
25
+ import static org.junit.Assert.assertEquals;
26
+ import static org.junit.Assert.assertNull;
27
+ import static org.junit.Assume.assumeNotNull;
37
28
 
38
29
  public class TestS3FileInputPlugin
39
30
  {
31
+ private static String EMBULK_S3_TEST_BUCKET;
32
+ private static String EMBULK_S3_TEST_ACCESS_KEY_ID;
33
+ private static String EMBULK_S3_TEST_SECRET_ACCESS_KEY;
34
+ private static final String EMBULK_S3_TEST_PATH_PREFIX = "embulk_input_s3_test";
35
+
36
+ /*
37
+ * This test case requires environment variables:
38
+ * EMBULK_S3_TEST_BUCKET
39
+ * EMBULK_S3_TEST_ACCESS_KEY_ID
40
+ * EMBULK_S3_TEST_SECRET_ACCESS_KEY
41
+ * If the variables not set, the test case is skipped.
42
+ */
43
+ @BeforeClass
44
+ public static void initializeConstantVariables()
45
+ {
46
+ EMBULK_S3_TEST_BUCKET = System.getenv("EMBULK_S3_TEST_BUCKET");
47
+ EMBULK_S3_TEST_ACCESS_KEY_ID = System.getenv("EMBULK_S3_TEST_ACCESS_KEY_ID");
48
+ EMBULK_S3_TEST_SECRET_ACCESS_KEY = System.getenv("EMBULK_S3_TEST_SECRET_ACCESS_KEY");
49
+ assumeNotNull(EMBULK_S3_TEST_BUCKET, EMBULK_S3_TEST_ACCESS_KEY_ID, EMBULK_S3_TEST_SECRET_ACCESS_KEY);
50
+ }
51
+
40
52
  @Rule
41
53
  public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
42
54
 
43
55
  private ConfigSource config;
44
- private S3FileInputPlugin plugin;
45
- private AmazonS3Client client;
56
+ private FileInputRunner runner;
57
+ private MockPageOutput output;
46
58
 
47
59
  @Before
48
60
  public void createResources()
49
61
  {
50
- config = config();
51
- plugin = spy(runtime.getInstance(S3FileInputPlugin.class));
52
- client = mock(AmazonS3Client.class);
62
+ config = runtime.getExec().newConfigSource()
63
+ .set("type", "s3")
64
+ .set("bucket", EMBULK_S3_TEST_BUCKET)
65
+ .set("access_key_id", EMBULK_S3_TEST_ACCESS_KEY_ID)
66
+ .set("secret_access_key", EMBULK_S3_TEST_SECRET_ACCESS_KEY)
67
+ .set("path_prefix", EMBULK_S3_TEST_PATH_PREFIX)
68
+ .set("parser", parserConfig(schemaConfig()));
69
+ runner = new FileInputRunner(runtime.getInstance(S3FileInputPlugin.class));
70
+ output = new MockPageOutput();
53
71
  }
54
72
 
55
73
  @Test
56
- public void checkS3ClientCreatedSuccessfully()
74
+ public void simpleTest()
57
75
  {
58
- PluginTask task = config().loadConfig(plugin.getTaskClass());
59
- plugin.newS3Client(task);
76
+ ConfigSource config = this.config.deepCopy();
77
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
78
+
79
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
80
+ assertRecords(config, output);
60
81
  }
61
82
 
62
83
  @Test
63
- public void listS3FilesByPrefix()
84
+ public void useLastPath()
85
+ throws Exception
64
86
  {
65
- // AWSS3Client returns list1 for the first iteration and list2 next.
66
- List<S3ObjectSummary> list1 = s3objects("in/", 0L, "in/file/", 0L, "in/file/sample.csv.gz", 12345L);
67
- List<S3ObjectSummary> list2 = s3objects("sample2.csv.gz", 0L);
68
- ObjectListing ol = mock(ObjectListing.class);
69
-
70
- doReturn(list1).doReturn(list2).when(ol).getObjectSummaries();
71
- doReturn(ol).when(client).listObjects(any(ListObjectsRequest.class));
72
- doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
73
-
74
- // It counts only size != 0 files.
75
- FileList.Builder builder = new FileList.Builder();
76
- S3FileInputPlugin.listS3FilesByPrefix(builder, client, "bucketName", "prefix", Optional.<String>absent());
77
- assertEquals(1, builder.size());
87
+ ConfigSource config = this.config.deepCopy().set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
88
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
89
+
90
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path"));
91
+ assertEquals(0, getRecords(config, output).size());
78
92
  }
79
93
 
80
94
  @Test
81
- public void checkLastPath()
95
+ public void emptyFilesWithLastPath()
96
+ throws Exception
82
97
  {
83
- doReturn(null).when(client).listObjects(any(ListObjectsRequest.class));
84
- doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
85
-
86
- { // set a last file to last_path
87
- ObjectListing listing = listing("in/aa", 0L, "in/aa/a", 3L, "in/aa/b", 2L, "in/aa/c", 1L);
88
- doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
89
-
90
- ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
91
- @Override
92
- public List<TaskReport> run(TaskSource taskSource, int taskCount)
93
- {
94
- assertEquals(3, taskCount);
95
- List<String> files = fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles());
96
- assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
97
- return emptyTaskReports(taskCount);
98
- }
99
- });
100
-
101
- assertEquals("in/aa/c", configDiff.get(String.class, "last_path"));
102
- }
98
+ ConfigSource config = this.config.deepCopy()
99
+ .set("path_prefix", "empty_files_prefix")
100
+ .set("last_path", EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv");
101
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
103
102
 
104
- { // if files are empty and last_path is not specified, last_path is not set.
105
- ObjectListing listing = listing("in/aa", 0L);
106
- doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
107
-
108
- ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
109
- @Override
110
- public List<TaskReport> run(TaskSource taskSource, int taskCount)
111
- {
112
- assertEquals(0, taskCount);
113
- assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
114
- return emptyTaskReports(taskCount);
115
- }
116
- });
117
-
118
- assertEquals(null, configDiff.get(String.class, "last_path", null));
119
- }
120
-
121
- { // if files are empty, keep the previous last_path.
122
- config.set("last_path", "in/bb");
123
-
124
- ObjectListing listing = listing("in/aa", 0L);
125
- doReturn(listing).when(client).listObjects(any(ListObjectsRequest.class));
126
-
127
- ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
128
- @Override
129
- public List<TaskReport> run(TaskSource taskSource, int taskCount) {
130
- assertEquals(0, taskCount);
131
- assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
132
- return emptyTaskReports(taskCount);
133
- }
134
- });
135
-
136
- assertEquals("in/bb", configDiff.get(String.class, "last_path"));
137
- }
103
+ assertEquals(EMBULK_S3_TEST_PATH_PREFIX + "/sample_01.csv", configDiff.get(String.class, "last_path")); // keep the last_path
104
+ assertEquals(0, getRecords(config, output).size());
138
105
  }
139
106
 
140
107
  @Test
141
- public void checkS3FileInputByOpen()
108
+ public void useTotalFileCountLimit()
142
109
  throws Exception
143
110
  {
144
- doReturn(s3object("in/aa/a", "aa")).when(client).getObject(any(GetObjectRequest.class));
145
- doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
146
-
147
- PluginTask task = config.loadConfig(plugin.getTaskClass());
148
- FileList.Builder builder = new FileList.Builder();
149
- builder.add("in/aa/a", 100);
150
- task.setFiles(builder.build());
151
-
152
- StringBuilder sbuf = new StringBuilder();
153
- try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
154
- LineDecoder d = new LineDecoder(input, config.loadConfig(LineDecoder.DecoderTask.class));
155
- while (d.nextFile()) {
156
- sbuf.append(d.poll());
157
- }
158
- }
159
- assertEquals("aa", sbuf.toString());
160
- }
111
+ ConfigSource config = this.config.deepCopy().set("total_file_count_limit", 0);
112
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
161
113
 
162
- public static ConfigSource config()
163
- {
164
- return Exec.newConfigSource()
165
- .set("bucket", "my_bucket")
166
- .set("path_prefix", "my_path_prefix")
167
- .set("access_key_id", "my_access_key_id")
168
- .set("secret_access_key", "my_secret_access_key");
114
+ assertNull(configDiff.get(String.class, "last_path"));
115
+ assertEquals(0, getRecords(config, output).size());
169
116
  }
170
117
 
171
- static ObjectListing listing(Object... keySizes)
118
+ @Test
119
+ public void usePathMatchPattern()
120
+ throws Exception
172
121
  {
173
- ObjectListing listing = mock(ObjectListing.class);
174
- if (keySizes == null) {
175
- return listing;
176
- }
122
+ ConfigSource config = this.config.deepCopy().set("path_match_pattern", "/match/");
123
+ ConfigDiff configDiff = runner.transaction(config, new Control(runner, output));
177
124
 
178
- List<S3ObjectSummary> s3objects = s3objects(keySizes);
179
- doReturn(s3objects).when(listing).getObjectSummaries();
180
- doReturn(null).when(listing).getNextMarker();
181
- return listing;
125
+ assertNull(configDiff.get(String.class, "last_path"));
126
+ assertEquals(0, getRecords(config, output).size());
182
127
  }
183
128
 
184
- static List<S3ObjectSummary> s3objects(Object... keySizes)
129
+ static class Control
130
+ implements InputPlugin.Control
185
131
  {
186
- ImmutableList.Builder<S3ObjectSummary> builder = new ImmutableList.Builder<>();
187
- if (keySizes == null) {
188
- return builder.build();
132
+ private FileInputRunner runner;
133
+ private PageOutput output;
134
+
135
+ Control(FileInputRunner runner, PageOutput output)
136
+ {
137
+ this.runner = runner;
138
+ this.output = output;
189
139
  }
190
140
 
191
- for (int i = 0; i < keySizes.length; i += 2) {
192
- String key = (String) keySizes[i];
193
- long size = (Long) keySizes[i + 1];
194
- builder.add(s3object(key, size));
141
+ @Override
142
+ public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
143
+ {
144
+ List<TaskReport> reports = new ArrayList<>();
145
+ for (int i = 0; i < taskCount; i++) {
146
+ reports.add(runner.run(taskSource, schema, i, output));
147
+ }
148
+ return reports;
195
149
  }
196
- return builder.build();
197
150
  }
198
151
 
199
- static S3ObjectSummary s3object(String key, long size)
152
+ static ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
200
153
  {
201
- S3ObjectSummary o = new S3ObjectSummary();
202
- o.setKey(key);
203
- o.setSize(size);
204
- return o;
154
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
155
+ builder.put("type", "csv");
156
+ builder.put("newline", "CRLF");
157
+ builder.put("delimiter", ",");
158
+ builder.put("quote", "\"");
159
+ builder.put("escape", "\"");
160
+ builder.put("trim_if_not_quoted", false);
161
+ builder.put("skip_header_lines", 0);
162
+ builder.put("allow_extra_columns", false);
163
+ builder.put("allow_optional_columns", false);
164
+ builder.put("columns", schemaConfig);
165
+ return builder.build();
205
166
  }
206
167
 
207
- static S3Object s3object(String key, String value)
168
+ static ImmutableList<Object> schemaConfig()
208
169
  {
209
- S3Object o = new S3Object();
210
- o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
211
- ObjectMetadata om = new ObjectMetadata();
212
- om.setContentLength(value.length());
213
- o.setObjectMetadata(om);
214
- return o;
170
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
171
+ builder.add(ImmutableMap.of("name", "timestamp", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
172
+ builder.add(ImmutableMap.of("name", "host", "type", "string"));
173
+ builder.add(ImmutableMap.of("name", "path", "type", "string"));
174
+ builder.add(ImmutableMap.of("name", "method", "type", "string"));
175
+ builder.add(ImmutableMap.of("name", "referer", "type", "string"));
176
+ builder.add(ImmutableMap.of("name", "code", "type", "long"));
177
+ builder.add(ImmutableMap.of("name", "agent", "type", "string"));
178
+ builder.add(ImmutableMap.of("name", "user", "type", "string"));
179
+ builder.add(ImmutableMap.of("name", "size", "type", "long"));
180
+ return builder.build();
215
181
  }
216
182
 
217
- static List<TaskReport> emptyTaskReports(int taskCount)
183
+ static void assertRecords(ConfigSource config, MockPageOutput output)
218
184
  {
219
- ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
220
- for (int i = 0; i < taskCount; i++) {
221
- reports.add(Exec.newTaskReport());
185
+ List<Object[]> records = getRecords(config, output);
186
+
187
+ assertEquals(2, records.size());
188
+ {
189
+ Object[] record = records.get(0);
190
+ assertEquals("2014-10-02 22:15:39 UTC", record[0].toString());
191
+ assertEquals("84.186.29.187", record[1]);
192
+ assertEquals("/category/electronics", record[2]);
193
+ assertEquals("GET", record[3]);
194
+ assertEquals("/category/music", record[4]);
195
+ assertEquals(200L, record[5]);
196
+ assertEquals("Mozilla/5.0", record[6]);
197
+ assertEquals("-", record[7]);
198
+ assertEquals(136L, record[8]);
199
+ }
200
+
201
+ {
202
+ Object[] record = records.get(1);
203
+ assertEquals("2014-10-02 22:15:01 UTC", record[0].toString());
204
+ assertEquals("140.36.216.47", record[1]);
205
+ assertEquals("/category/music?from=10", record[2]);
206
+ assertEquals("GET", record[3]);
207
+ assertEquals("-", record[4]);
208
+ assertEquals(200L, record[5]);
209
+ assertEquals("Mozilla/5.0", record[6]);
210
+ assertEquals("-", record[7]);
211
+ assertEquals(70L, record[8]);
222
212
  }
223
- return reports.build();
224
213
  }
225
214
 
226
- private static List<String> fileListToList(FileList list)
215
+ static List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
227
216
  {
228
- ImmutableList.Builder<String> builder = ImmutableList.builder();
229
- for (int i=0; i < list.getTaskCount(); i++) {
230
- for (String path : list.get(i)) {
231
- builder.add(path);
232
- }
233
- }
234
- return builder.build();
217
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
218
+ return Pages.toObjects(schema, output.pages);
235
219
  }
236
220
  }
@@ -2,6 +2,9 @@ package org.embulk.input.s3;
2
2
 
3
3
  import com.amazonaws.services.s3.AmazonS3Client;
4
4
  import com.amazonaws.services.s3.model.GetObjectRequest;
5
+ import com.amazonaws.services.s3.model.ObjectMetadata;
6
+ import com.amazonaws.services.s3.model.S3Object;
7
+ import com.amazonaws.services.s3.model.S3ObjectInputStream;
5
8
  import org.embulk.EmbulkTestRuntime;
6
9
  import org.embulk.input.s3.AbstractS3FileInputPlugin.S3InputStreamReopener;
7
10
  import org.junit.Before;
@@ -9,11 +12,11 @@ import org.junit.Rule;
9
12
  import org.junit.Test;
10
13
 
11
14
  import java.io.BufferedReader;
15
+ import java.io.ByteArrayInputStream;
12
16
  import java.io.InputStream;
13
17
  import java.io.InputStreamReader;
14
18
 
15
19
  import static org.junit.Assert.assertEquals;
16
- import static org.embulk.input.s3.TestS3FileInputPlugin.s3object;
17
20
  import static org.mockito.Matchers.any;
18
21
  import static org.mockito.Mockito.doReturn;
19
22
  import static org.mockito.Mockito.doThrow;
@@ -60,4 +63,14 @@ public class TestS3InputStreamReopener
60
63
  }
61
64
  }
62
65
  }
66
+
67
+ static S3Object s3object(String key, String value)
68
+ {
69
+ S3Object o = new S3Object();
70
+ o.setObjectContent(new S3ObjectInputStream(new ByteArrayInputStream(value.getBytes()), null));
71
+ ObjectMetadata om = new ObjectMetadata();
72
+ om.setContentLength(value.length());
73
+ o.setObjectMetadata(om);
74
+ return o;
75
+ }
63
76
  }
@@ -0,0 +1,3 @@
1
+ timestamp,host,path,method,referer,code,agent,user,size
2
+ 2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136
3
+ 2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-18 00:00:00.000000000 Z
11
+ date: 2015-12-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,13 +52,15 @@ files:
52
52
  - src/main/java/org/embulk/input/s3/AwsCredentialsTask.java
53
53
  - src/main/java/org/embulk/input/s3/FileList.java
54
54
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
55
+ - src/test/java/org/embulk/input/s3/TestAwsCredentials.java
55
56
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
56
57
  - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
58
+ - src/test/resources/sample_01.csv
57
59
  - classpath/aws-java-sdk-core-1.10.33.jar
58
60
  - classpath/aws-java-sdk-kms-1.10.33.jar
59
61
  - classpath/aws-java-sdk-s3-1.10.33.jar
60
62
  - classpath/commons-codec-1.6.jar
61
- - classpath/embulk-input-s3-0.2.4.jar
63
+ - classpath/embulk-input-s3-0.2.5.jar
62
64
  - classpath/httpclient-4.3.6.jar
63
65
  - classpath/httpcore-4.3.3.jar
64
66
  - classpath/jcl-over-slf4j-1.7.12.jar
Binary file