embulk-input-s3 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f03ab171561f7cca3ff6fe8e17c63b73f31e2db3
4
- data.tar.gz: a636f66a873c449741f09c51a7b415f550517d26
3
+ metadata.gz: db7314fde9364e1d4ec9edbb67f90cfc5c93dbe7
4
+ data.tar.gz: b3133c6d3ea81cef907d8cd974b5471186e086b6
5
5
  SHA512:
6
- metadata.gz: 24baf759d3956a23317ed73f5878d9ec8d66bc493ba344131a16b325fb3558dd5fbab332dacfcac2f7abc89ecd3518cec89f4c232c7d17135159fa95fb444fac
7
- data.tar.gz: c83e6f98d5d03e45f83641d879a80c41be41d292d999a1b90ab783ab02f296faa77a3a2276b447804e78dd2908a8b1f7cc026105688eff34fea0fdbbde92130b
6
+ metadata.gz: 0cd7e8ce269c322e7a03262267ae23370a9cff12877b9f33b8bebf1769ebdf093896babe8193324a3f4aa5b37ccae6a67b380e94367ab65fcc381b3d0e9a848e
7
+ data.tar.gz: 0e934e0baf997bfb29b3e3ed337daa28133a38a513050931902dc5f3d2cd03f652f6b2cb31e7d8fecd5b7e3dfccab869ede056053892af9e3bab2dc8cbd1ae4b
@@ -3,6 +3,7 @@ package org.embulk.input.s3;
3
3
  import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Collections;
6
+ import java.util.Iterator;
6
7
  import java.io.IOException;
7
8
  import java.io.InterruptedIOException;
8
9
  import java.io.InputStream;
@@ -14,8 +15,6 @@ import com.google.common.base.Throwables;
14
15
  import org.slf4j.Logger;
15
16
  import com.amazonaws.auth.AWSCredentials;
16
17
  import com.amazonaws.auth.AWSCredentialsProvider;
17
- import com.amazonaws.auth.BasicAWSCredentials;
18
- import com.amazonaws.auth.AnonymousAWSCredentials;
19
18
  import com.amazonaws.services.s3.AmazonS3Client;
20
19
  import com.amazonaws.services.s3.model.ListObjectsRequest;
21
20
  import com.amazonaws.services.s3.model.S3ObjectSummary;
@@ -48,7 +47,7 @@ public abstract class AbstractS3FileInputPlugin
48
47
  private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
49
48
 
50
49
  public interface PluginTask
51
- extends Task
50
+ extends AwsCredentialsTask, FileList.Task, Task
52
51
  {
53
52
  @Config("bucket")
54
53
  public String getBucket();
@@ -64,16 +63,10 @@ public abstract class AbstractS3FileInputPlugin
64
63
  @ConfigDefault("null")
65
64
  public Optional<String> getAccessKeyId();
66
65
 
67
- @Config("secret_access_key")
68
- @ConfigDefault("null")
69
- public Optional<String> getSecretAccessKey();
70
-
71
66
  // TODO timeout, ssl, etc
72
67
 
73
- // TODO support more options such as STS
74
-
75
- public List<String> getFiles();
76
- public void setFiles(List<String> files);
68
+ public FileList getFiles();
69
+ public void setFiles(FileList files);
77
70
 
78
71
  @ConfigInject
79
72
  public BufferAllocator getBufferAllocator();
@@ -90,7 +83,7 @@ public abstract class AbstractS3FileInputPlugin
90
83
  task.setFiles(listFiles(task));
91
84
 
92
85
  // number of processors is same with number of files
93
- return resume(task.dump(), task.getFiles().size(), control);
86
+ return resume(task.dump(), task.getFiles().getTaskCount(), control);
94
87
  }
95
88
 
96
89
  @Override
@@ -109,16 +102,7 @@ public abstract class AbstractS3FileInputPlugin
109
102
  ConfigDiff configDiff = Exec.newConfigDiff();
110
103
 
111
104
  // last_path
112
- if (task.getFiles().isEmpty()) {
113
- // keep the last value
114
- if (task.getLastPath().isPresent()) {
115
- configDiff.set("last_path", task.getLastPath().get());
116
- }
117
- } else {
118
- List<String> files = new ArrayList<String>(task.getFiles());
119
- Collections.sort(files);
120
- configDiff.set("last_path", files.get(files.size() - 1));
121
- }
105
+ configDiff.set("last_path", task.getFiles().getLastPath(task.getLastPath()));
122
106
 
123
107
  return configDiff;
124
108
  }
@@ -138,24 +122,7 @@ public abstract class AbstractS3FileInputPlugin
138
122
 
139
123
  protected AWSCredentialsProvider getCredentialsProvider(PluginTask task)
140
124
  {
141
- final AWSCredentials cred;
142
- if (task.getAccessKeyId().isPresent()) {
143
- cred = new BasicAWSCredentials(
144
- task.getAccessKeyId().get(),
145
- task.getSecretAccessKey().get());
146
- } else {
147
- cred = new AnonymousAWSCredentials();
148
- }
149
- return new AWSCredentialsProvider() {
150
- public AWSCredentials getCredentials()
151
- {
152
- return cred;
153
- }
154
-
155
- public void refresh()
156
- {
157
- }
158
- };
125
+ return AwsCredentials.getAWSCredentialsProvider(task);
159
126
  }
160
127
 
161
128
  protected ClientConfiguration getClientConfiguration(PluginTask task)
@@ -170,7 +137,7 @@ public abstract class AbstractS3FileInputPlugin
170
137
  return clientConfig;
171
138
  }
172
139
 
173
- private List<String> listFiles(PluginTask task)
140
+ private FileList listFiles(PluginTask task)
174
141
  {
175
142
  AmazonS3Client client = newS3Client(task);
176
143
  String bucketName = task.getBucket();
@@ -179,7 +146,10 @@ public abstract class AbstractS3FileInputPlugin
179
146
  log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
180
147
  }
181
148
 
182
- return listS3FilesByPrefix(client, bucketName, task.getPathPrefix(), task.getLastPath());
149
+ FileList.Builder builder = new FileList.Builder(task);
150
+ listS3FilesByPrefix(builder, client, bucketName,
151
+ task.getPathPrefix(), task.getLastPath());
152
+ return builder.build();
183
153
  }
184
154
 
185
155
  /**
@@ -187,24 +157,24 @@ public abstract class AbstractS3FileInputPlugin
187
157
  *
188
158
  * The resulting list does not include the file that's size == 0.
189
159
  */
190
- public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName,
160
+ public static void listS3FilesByPrefix(FileList.Builder builder,
161
+ AmazonS3Client client, String bucketName,
191
162
  String prefix, Optional<String> lastPath)
192
163
  {
193
- ImmutableList.Builder<String> builder = ImmutableList.builder();
194
-
195
164
  String lastKey = lastPath.orNull();
196
165
  do {
197
166
  ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
198
167
  ObjectListing ol = client.listObjects(req);
199
- for(S3ObjectSummary s : ol.getObjectSummaries()) {
168
+ for (S3ObjectSummary s : ol.getObjectSummaries()) {
200
169
  if (s.getSize() > 0) {
201
- builder.add(s.getKey());
170
+ builder.add(s.getKey(), s.getSize());
171
+ if (!builder.needsMore()) {
172
+ return;
173
+ }
202
174
  }
203
175
  }
204
176
  lastKey = ol.getNextMarker();
205
177
  } while(lastKey != null);
206
-
207
- return builder.build();
208
178
  }
209
179
 
210
180
  @Override
@@ -308,24 +278,22 @@ public abstract class AbstractS3FileInputPlugin
308
278
  {
309
279
  private AmazonS3Client client;
310
280
  private final String bucket;
311
- private final String key;
312
- private boolean opened = false;
281
+ private final Iterator<String> iterator;
313
282
 
314
283
  public SingleFileProvider(PluginTask task, int taskIndex)
315
284
  {
316
285
  this.client = newS3Client(task);
317
286
  this.bucket = task.getBucket();
318
- this.key = task.getFiles().get(taskIndex);
287
+ this.iterator = task.getFiles().get(taskIndex).iterator();
319
288
  }
320
289
 
321
290
  @Override
322
291
  public InputStream openNext() throws IOException
323
292
  {
324
- if (opened) {
293
+ if (!iterator.hasNext()) {
325
294
  return null;
326
295
  }
327
- opened = true;
328
- GetObjectRequest request = new GetObjectRequest(bucket, key);
296
+ GetObjectRequest request = new GetObjectRequest(bucket, iterator.next());
329
297
  S3Object obj = client.getObject(request);
330
298
  return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, obj.getObjectMetadata().getContentLength()));
331
299
  }
@@ -0,0 +1,179 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.amazonaws.auth.AWSCredentials;
5
+ import com.amazonaws.auth.AWSCredentialsProvider;
6
+ import com.amazonaws.auth.AWSSessionCredentials;
7
+ import com.amazonaws.auth.AWSSessionCredentialsProvider;
8
+ import com.amazonaws.auth.AnonymousAWSCredentials;
9
+ import com.amazonaws.auth.BasicAWSCredentials;
10
+ import com.amazonaws.auth.BasicSessionCredentials;
11
+ import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
12
+ import com.amazonaws.auth.InstanceProfileCredentialsProvider;
13
+ import com.amazonaws.auth.SystemPropertiesCredentialsProvider;
14
+ import com.amazonaws.auth.profile.ProfileCredentialsProvider;
15
+ import com.amazonaws.auth.profile.ProfilesConfigFile;
16
+ import org.embulk.config.ConfigException;
17
+ import org.embulk.spi.Exec;
18
+ import org.embulk.spi.unit.LocalFile;
19
+ import org.slf4j.Logger;
20
+
21
+ public abstract class AwsCredentials
22
+ {
23
+ private AwsCredentials() { }
24
+
25
+ public static AWSCredentialsProvider getAWSCredentialsProvider(AwsCredentialsTask task)
26
+ {
27
+ switch (task.getAuthMethod()) {
28
+ case "basic":
29
+ // for backward compatibility
30
+ if (!task.getAccessKeyId().isPresent() && !task.getAccessKeyId().isPresent()) {
31
+ final Logger log = Exec.getLogger(AwsCredentials.class);
32
+ log.warn("Both access_key_id and secret_access_key are not set. Assuming that 'auth_method: anonymous' option is set.");
33
+ log.warn("If you intentionally use anonymous authentication, please set 'auth_method: anonymous' option.");
34
+ log.warn("This behavior will be removed in a futurte release.");
35
+ reject(task.getSessionToken(), "session_token");
36
+ reject(task.getProfileFile(), "profile_file");
37
+ reject(task.getProfileName(), "profile_name");
38
+ return new AWSCredentialsProvider() {
39
+ public AWSCredentials getCredentials()
40
+ {
41
+ return new AnonymousAWSCredentials();
42
+ }
43
+
44
+ public void refresh() { }
45
+ };
46
+ }
47
+ else {
48
+ String accessKeyId = require(task.getAccessKeyId(), "'access_key_id', 'secret_access_key'");
49
+ String secretAccessKey = require(task.getSecretAccessKey(), "'secret_access_key'");
50
+ reject(task.getSessionToken(), "session_token");
51
+ reject(task.getProfileFile(), "profile_file");
52
+ reject(task.getProfileName(), "profile_name");
53
+ final BasicAWSCredentials creds = new BasicAWSCredentials(accessKeyId, secretAccessKey);
54
+ return new AWSCredentialsProvider() {
55
+ public AWSCredentials getCredentials()
56
+ {
57
+ return creds;
58
+ }
59
+
60
+ public void refresh() { }
61
+ };
62
+ }
63
+
64
+ case "env":
65
+ reject(task.getAccessKeyId(), "access_key_id");
66
+ reject(task.getSecretAccessKey(), "secret_access_key");
67
+ reject(task.getSessionToken(), "session_token");
68
+ reject(task.getProfileFile(), "profile_file");
69
+ reject(task.getProfileName(), "profile_name");
70
+ return overwriteBasicCredentials(task, new EnvironmentVariableCredentialsProvider().getCredentials());
71
+
72
+ case "instance":
73
+ reject(task.getAccessKeyId(), "access_key_id");
74
+ reject(task.getSecretAccessKey(), "secret_access_key");
75
+ reject(task.getSessionToken(), "session_token");
76
+ reject(task.getProfileFile(), "profile_file");
77
+ reject(task.getProfileName(), "profile_name");
78
+ return new InstanceProfileCredentialsProvider();
79
+
80
+ case "profile":
81
+ {
82
+ reject(task.getAccessKeyId(), "access_key_id");
83
+ reject(task.getSecretAccessKey(), "secret_access_key");
84
+ reject(task.getSessionToken(), "session_token");
85
+
86
+ String profileName = task.getProfileName().or("default");
87
+ ProfileCredentialsProvider provider;
88
+ if (task.getProfileFile().isPresent()) {
89
+ ProfilesConfigFile file = new ProfilesConfigFile(task.getProfileFile().get().getFile());
90
+ provider = new ProfileCredentialsProvider(file, profileName);
91
+ }
92
+ else {
93
+ provider = new ProfileCredentialsProvider(profileName);
94
+ }
95
+ task.setProfileName(Optional.<String>absent());
96
+ task.setProfileFile(Optional.<LocalFile>absent());
97
+
98
+ return overwriteBasicCredentials(task, provider.getCredentials());
99
+ }
100
+
101
+ case "properties":
102
+ reject(task.getAccessKeyId(), "access_key_id");
103
+ reject(task.getSecretAccessKey(), "secret_access_key");
104
+ reject(task.getSessionToken(), "session_token");
105
+ reject(task.getProfileFile(), "profile_file");
106
+ reject(task.getProfileName(), "profile_name");
107
+ return overwriteBasicCredentials(task, new SystemPropertiesCredentialsProvider().getCredentials());
108
+
109
+ case "anonymous":
110
+ reject(task.getAccessKeyId(), "access_key_id");
111
+ reject(task.getSecretAccessKey(), "secret_access_key");
112
+ reject(task.getSessionToken(), "session_token");
113
+ reject(task.getProfileFile(), "profile_file");
114
+ reject(task.getProfileName(), "profile_name");
115
+ return new AWSCredentialsProvider() {
116
+ public AWSCredentials getCredentials()
117
+ {
118
+ return new AnonymousAWSCredentials();
119
+ }
120
+
121
+ public void refresh() { }
122
+ };
123
+
124
+ case "session":
125
+ {
126
+ String accessKeyId = require(task.getAccessKeyId(), "'access_key_id', 'secret_access_key', 'session_token'");
127
+ String secretAccessKey = require(task.getSecretAccessKey(), "'secret_access_key', 'session_token'");
128
+ String sessionToken = require(task.getSessionToken(), "'session_token'");
129
+ reject(task.getProfileFile(), "profile_file");
130
+ reject(task.getProfileName(), "profile_name");
131
+ final AWSSessionCredentials creds = new BasicSessionCredentials(accessKeyId, secretAccessKey, sessionToken);
132
+ return new AWSSessionCredentialsProvider() {
133
+ public AWSSessionCredentials getCredentials()
134
+ {
135
+ return creds;
136
+ }
137
+
138
+ public void refresh() { }
139
+ };
140
+ }
141
+
142
+ default:
143
+ throw new ConfigException(String.format("Unknwon auth_method '%s'. Supported methods are basic, instance, profile, properties, anonymous, and session.",
144
+ task.getAuthMethod()));
145
+ }
146
+ }
147
+
148
+ private static AWSCredentialsProvider overwriteBasicCredentials(AwsCredentialsTask task, final AWSCredentials creds)
149
+ {
150
+ task.setAuthMethod("basic");
151
+ task.setAccessKeyId(Optional.of(creds.getAWSAccessKeyId()));
152
+ task.setSecretAccessKey(Optional.of(creds.getAWSSecretKey()));
153
+ return new AWSCredentialsProvider() {
154
+ public AWSCredentials getCredentials()
155
+ {
156
+ return creds;
157
+ }
158
+
159
+ public void refresh() { }
160
+ };
161
+ }
162
+
163
+ private static <T> T require(Optional<T> value, String message)
164
+ {
165
+ if (value.isPresent()) {
166
+ return value.get();
167
+ }
168
+ else {
169
+ throw new ConfigException("Required option is not set: " + message);
170
+ }
171
+ }
172
+
173
+ private static <T> void reject(Optional<T> value, String message)
174
+ {
175
+ if (value.isPresent()) {
176
+ throw new ConfigException("Invalid option is set: " + message);
177
+ }
178
+ }
179
+ }
@@ -0,0 +1,39 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.spi.unit.LocalFile;
7
+
8
+ public interface AwsCredentialsTask
9
+ {
10
+ @Config("auth_method")
11
+ @ConfigDefault("\"basic\"")
12
+ String getAuthMethod();
13
+ void setAuthMethod(String method);
14
+
15
+ @Config("access_key_id")
16
+ @ConfigDefault("null")
17
+ Optional<String> getAccessKeyId();
18
+ void setAccessKeyId(Optional<String> value);
19
+
20
+ @Config("secret_access_key")
21
+ @ConfigDefault("null")
22
+ Optional<String> getSecretAccessKey();
23
+ void setSecretAccessKey(Optional<String> value);
24
+
25
+ @Config("session_token")
26
+ @ConfigDefault("null")
27
+ Optional<String> getSessionToken();
28
+ void setSessionToken(Optional<String> value);
29
+
30
+ @Config("profile_file")
31
+ @ConfigDefault("null")
32
+ Optional<LocalFile> getProfileFile();
33
+ void setProfileFile(Optional<LocalFile> value);
34
+
35
+ @Config("profile_name")
36
+ @ConfigDefault("null")
37
+ Optional<String> getProfileName();
38
+ void setProfileName(Optional<String> value);
39
+ }
@@ -0,0 +1,289 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.util.List;
4
+ import java.util.AbstractList;
5
+ import java.util.ArrayList;
6
+ import java.util.zip.GZIPInputStream;
7
+ import java.util.zip.GZIPOutputStream;
8
+ import java.io.InputStream;
9
+ import java.io.OutputStream;
10
+ import java.io.BufferedOutputStream;
11
+ import java.io.BufferedInputStream;
12
+ import java.io.ByteArrayInputStream;
13
+ import java.io.ByteArrayOutputStream;
14
+ import java.io.IOException;
15
+ import java.nio.ByteBuffer;
16
+ import java.nio.charset.StandardCharsets;
17
+ import org.embulk.config.Config;
18
+ import org.embulk.config.ConfigDefault;
19
+ import org.embulk.config.ConfigSource;
20
+ import com.google.common.base.Throwables;
21
+ import com.google.common.base.Optional;
22
+ import com.google.common.collect.ImmutableList;
23
+ import com.fasterxml.jackson.annotation.JsonProperty;
24
+ import com.fasterxml.jackson.annotation.JsonIgnore;
25
+ import com.fasterxml.jackson.annotation.JsonCreator;
26
+
27
+ // this class should be moved to embulk-core
28
+ public class FileList
29
+ {
30
+ public interface Task
31
+ {
32
+ @Config("total_file_count_limit")
33
+ @ConfigDefault("2147483647")
34
+ int getTotalFileCountLimit();
35
+ }
36
+
37
+ public static class Entry
38
+ {
39
+ private int index;
40
+ private long size;
41
+
42
+ @JsonCreator
43
+ public Entry(
44
+ @JsonProperty("index") int index,
45
+ @JsonProperty("size") long size)
46
+ {
47
+ this.index = index;
48
+ this.size = size;
49
+ }
50
+
51
+ @JsonProperty("index")
52
+ public int getIndex() { return index; }
53
+
54
+ @JsonProperty("size")
55
+ public long getSize() { return size; }
56
+ }
57
+
58
+ public static class Builder
59
+ {
60
+ private final ByteArrayOutputStream binary;
61
+ private final OutputStream stream;
62
+ private final List<Entry> entries = new ArrayList<>();
63
+ private String last = null;
64
+
65
+ private int limitCount = Integer.MAX_VALUE;
66
+ private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
67
+
68
+ public Builder(Task task)
69
+ {
70
+ this();
71
+ this.limitCount = task.getTotalFileCountLimit();
72
+ }
73
+
74
+ public Builder(ConfigSource config)
75
+ {
76
+ this();
77
+ this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
78
+ }
79
+
80
+ public Builder()
81
+ {
82
+ binary = new ByteArrayOutputStream();
83
+ try {
84
+ stream = new BufferedOutputStream(new GZIPOutputStream(binary));
85
+ }
86
+ catch (IOException ex) {
87
+ throw Throwables.propagate(ex);
88
+ }
89
+ }
90
+
91
+ public Builder limitTotalFileCount(int limitCount)
92
+ {
93
+ this.limitCount = limitCount;
94
+ return this;
95
+ }
96
+
97
+ public int size()
98
+ {
99
+ return entries.size();
100
+ }
101
+
102
+ public boolean needsMore()
103
+ {
104
+ return size() < limitCount;
105
+ }
106
+
107
+ public synchronized boolean add(String path, long size)
108
+ {
109
+ // TODO throw IllegalStateException if stream is already closed
110
+
111
+ if (!needsMore()) {
112
+ return false;
113
+ }
114
+
115
+ // TODO in the future, support some other filtering parameters (file name suffix filter, regex filter, etc)
116
+ // and return false if filtered out.
117
+
118
+ int index = entries.size();
119
+ entries.add(new Entry(index, size));
120
+
121
+ byte[] data = path.getBytes(StandardCharsets.UTF_8);
122
+ castBuffer.putInt(0, data.length);
123
+ try {
124
+ stream.write(castBuffer.array());
125
+ stream.write(data);
126
+ }
127
+ catch (IOException ex) {
128
+ throw Throwables.propagate(ex);
129
+ }
130
+
131
+ last = path;
132
+ return true;
133
+ }
134
+
135
+ public FileList build()
136
+ {
137
+ try {
138
+ stream.close();
139
+ }
140
+ catch (IOException ex) {
141
+ throw Throwables.propagate(ex);
142
+ }
143
+ return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
144
+ }
145
+
146
+ private List<List<Entry>> getSplits(List<Entry> all)
147
+ {
148
+ // TODO combine multiple entries into one task using some configuration parameters
149
+ List<List<Entry>> tasks = new ArrayList<>();
150
+ for (Entry entry : all) {
151
+ tasks.add(ImmutableList.of(entry));
152
+ }
153
+ return tasks;
154
+ }
155
+ }
156
+
157
+ private final byte[] data;
158
+ private final List<List<Entry>> tasks;
159
+ private final Optional<String> last;
160
+
161
+ @JsonCreator
162
+ @Deprecated
163
+ public FileList(
164
+ @JsonProperty("data") byte[] data,
165
+ @JsonProperty("tasks") List<List<Entry>> tasks,
166
+ @JsonProperty("last") Optional<String> last)
167
+ {
168
+ this.data = data;
169
+ this.tasks = tasks;
170
+ this.last = last;
171
+ }
172
+
173
+ @JsonIgnore
174
+ public Optional<String> getLastPath(Optional<String> lastLastPath)
175
+ {
176
+ if (last.isPresent()) {
177
+ return last;
178
+ }
179
+ return lastLastPath;
180
+ }
181
+
182
+ @JsonIgnore
183
+ public int getTaskCount()
184
+ {
185
+ return tasks.size();
186
+ }
187
+
188
+ @JsonIgnore
189
+ public List<String> get(int i)
190
+ {
191
+ return new EntryList(data, tasks.get(i));
192
+ }
193
+
194
+ @JsonProperty("data")
195
+ @Deprecated
196
+ public byte[] getData()
197
+ {
198
+ return data;
199
+ }
200
+
201
+ @JsonProperty("tasks")
202
+ @Deprecated
203
+ public List<List<Entry>> getTasks()
204
+ {
205
+ return tasks;
206
+ }
207
+
208
+ @JsonProperty("last")
209
+ @Deprecated
210
+ public Optional<String> getLast()
211
+ {
212
+ return last;
213
+ }
214
+
215
+ private class EntryList
216
+ extends AbstractList<String>
217
+ {
218
+ private final byte[] data;
219
+ private final List<Entry> entries;
220
+ private InputStream stream;
221
+ private int current;
222
+
223
+ private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
224
+
225
+ public EntryList(byte[] data, List<Entry> entries)
226
+ {
227
+ this.data = data;
228
+ this.entries = entries;
229
+ try {
230
+ this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
231
+ }
232
+ catch (IOException ex) {
233
+ throw Throwables.propagate(ex);
234
+ }
235
+ this.current = 0;
236
+ }
237
+
238
+ @Override
239
+ public synchronized String get(int i)
240
+ {
241
+ Entry e = entries.get(i);
242
+ if (e.getIndex() < current) {
243
+ // rewind to the head
244
+ try {
245
+ stream.close();
246
+ stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
247
+ }
248
+ catch (IOException ex) {
249
+ throw Throwables.propagate(ex);
250
+ }
251
+ current = 0;
252
+ }
253
+
254
+ while (current < e.getIndex()) {
255
+ readNext();
256
+ }
257
+ // now current == e.getIndex()
258
+ return readNextString();
259
+ }
260
+
261
+ @Override
262
+ public int size()
263
+ {
264
+ return entries.size();
265
+ }
266
+
267
+ private byte[] readNext()
268
+ {
269
+ try {
270
+ stream.read(castBuffer.array());
271
+ int n = castBuffer.getInt(0);
272
+ byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
273
+ stream.read(b);
274
+
275
+ current++;
276
+
277
+ return b;
278
+ }
279
+ catch (IOException ex) {
280
+ throw Throwables.propagate(ex);
281
+ }
282
+ }
283
+
284
+ private String readNextString()
285
+ {
286
+ return new String(readNext(), StandardCharsets.UTF_8);
287
+ }
288
+ }
289
+ }
@@ -72,7 +72,9 @@ public class TestS3FileInputPlugin
72
72
  doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
73
73
 
74
74
  // It counts only size != 0 files.
75
- assertEquals(1, S3FileInputPlugin.listS3FilesByPrefix(client, "bucketName", "prefix", Optional.<String>absent()).size());
75
+ FileList.Builder builder = new FileList.Builder();
76
+ S3FileInputPlugin.listS3FilesByPrefix(builder, client, "bucketName", "prefix", Optional.<String>absent());
77
+ assertEquals(1, builder.size());
76
78
  }
77
79
 
78
80
  @Test
@@ -90,7 +92,7 @@ public class TestS3FileInputPlugin
90
92
  public List<TaskReport> run(TaskSource taskSource, int taskCount)
91
93
  {
92
94
  assertEquals(3, taskCount);
93
- List<String> files = taskSource.loadTask(S3PluginTask.class).getFiles();
95
+ List<String> files = fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles());
94
96
  assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
95
97
  return emptyTaskReports(taskCount);
96
98
  }
@@ -108,12 +110,12 @@ public class TestS3FileInputPlugin
108
110
  public List<TaskReport> run(TaskSource taskSource, int taskCount)
109
111
  {
110
112
  assertEquals(0, taskCount);
111
- assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
113
+ assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
112
114
  return emptyTaskReports(taskCount);
113
115
  }
114
116
  });
115
117
 
116
- assertFalse(configDiff.has("last_path"));
118
+ assertEquals(null, configDiff.get(String.class, "last_path", null));
117
119
  }
118
120
 
119
121
  { // if files are empty, keep the previous last_path.
@@ -126,7 +128,7 @@ public class TestS3FileInputPlugin
126
128
  @Override
127
129
  public List<TaskReport> run(TaskSource taskSource, int taskCount) {
128
130
  assertEquals(0, taskCount);
129
- assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
131
+ assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
130
132
  return emptyTaskReports(taskCount);
131
133
  }
132
134
  });
@@ -143,7 +145,9 @@ public class TestS3FileInputPlugin
143
145
  doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
144
146
 
145
147
  PluginTask task = config.loadConfig(plugin.getTaskClass());
146
- task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
148
+ FileList.Builder builder = new FileList.Builder();
149
+ builder.add("in/aa/a", 100);
150
+ task.setFiles(builder.build());
147
151
 
148
152
  StringBuilder sbuf = new StringBuilder();
149
153
  try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
@@ -218,4 +222,15 @@ public class TestS3FileInputPlugin
218
222
  }
219
223
  return reports.build();
220
224
  }
225
+
226
+ private static List<String> fileListToList(FileList list)
227
+ {
228
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
229
+ for (int i=0; i < list.getTaskCount(); i++) {
230
+ for (String path : list.get(i)) {
231
+ builder.add(path);
232
+ }
233
+ }
234
+ return builder.build();
235
+ }
221
236
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-15 00:00:00.000000000 Z
11
+ date: 2015-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -48,18 +48,20 @@ files:
48
48
  - build.gradle
49
49
  - lib/embulk/input/s3.rb
50
50
  - src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
51
+ - src/main/java/org/embulk/input/s3/AwsCredentials.java
52
+ - src/main/java/org/embulk/input/s3/AwsCredentialsTask.java
53
+ - src/main/java/org/embulk/input/s3/FileList.java
51
54
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
52
55
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
53
56
  - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
54
- - classpath/aws-java-sdk-core-1.9.22.jar
55
- - classpath/aws-java-sdk-kms-1.9.22.jar
56
- - classpath/aws-java-sdk-s3-1.9.22.jar
57
+ - classpath/aws-java-sdk-core-1.10.33.jar
58
+ - classpath/aws-java-sdk-kms-1.10.33.jar
59
+ - classpath/aws-java-sdk-s3-1.10.33.jar
57
60
  - classpath/commons-codec-1.6.jar
58
- - classpath/embulk-input-s3-0.2.3.jar
59
- - classpath/httpclient-4.3.4.jar
60
- - classpath/httpcore-4.3.2.jar
61
+ - classpath/embulk-input-s3-0.2.4.jar
62
+ - classpath/httpclient-4.3.6.jar
63
+ - classpath/httpcore-4.3.3.jar
61
64
  - classpath/jcl-over-slf4j-1.7.12.jar
62
- - classpath/joda-time-2.8.2.jar
63
65
  homepage: https://github.com/embulk/embulk-input-s3
64
66
  licenses:
65
67
  - Apache 2.0