embulk-input-s3 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f03ab171561f7cca3ff6fe8e17c63b73f31e2db3
4
- data.tar.gz: a636f66a873c449741f09c51a7b415f550517d26
3
+ metadata.gz: db7314fde9364e1d4ec9edbb67f90cfc5c93dbe7
4
+ data.tar.gz: b3133c6d3ea81cef907d8cd974b5471186e086b6
5
5
  SHA512:
6
- metadata.gz: 24baf759d3956a23317ed73f5878d9ec8d66bc493ba344131a16b325fb3558dd5fbab332dacfcac2f7abc89ecd3518cec89f4c232c7d17135159fa95fb444fac
7
- data.tar.gz: c83e6f98d5d03e45f83641d879a80c41be41d292d999a1b90ab783ab02f296faa77a3a2276b447804e78dd2908a8b1f7cc026105688eff34fea0fdbbde92130b
6
+ metadata.gz: 0cd7e8ce269c322e7a03262267ae23370a9cff12877b9f33b8bebf1769ebdf093896babe8193324a3f4aa5b37ccae6a67b380e94367ab65fcc381b3d0e9a848e
7
+ data.tar.gz: 0e934e0baf997bfb29b3e3ed337daa28133a38a513050931902dc5f3d2cd03f652f6b2cb31e7d8fecd5b7e3dfccab869ede056053892af9e3bab2dc8cbd1ae4b
@@ -3,6 +3,7 @@ package org.embulk.input.s3;
3
3
  import java.util.List;
4
4
  import java.util.ArrayList;
5
5
  import java.util.Collections;
6
+ import java.util.Iterator;
6
7
  import java.io.IOException;
7
8
  import java.io.InterruptedIOException;
8
9
  import java.io.InputStream;
@@ -14,8 +15,6 @@ import com.google.common.base.Throwables;
14
15
  import org.slf4j.Logger;
15
16
  import com.amazonaws.auth.AWSCredentials;
16
17
  import com.amazonaws.auth.AWSCredentialsProvider;
17
- import com.amazonaws.auth.BasicAWSCredentials;
18
- import com.amazonaws.auth.AnonymousAWSCredentials;
19
18
  import com.amazonaws.services.s3.AmazonS3Client;
20
19
  import com.amazonaws.services.s3.model.ListObjectsRequest;
21
20
  import com.amazonaws.services.s3.model.S3ObjectSummary;
@@ -48,7 +47,7 @@ public abstract class AbstractS3FileInputPlugin
48
47
  private final Logger log = Exec.getLogger(S3FileInputPlugin.class);
49
48
 
50
49
  public interface PluginTask
51
- extends Task
50
+ extends AwsCredentialsTask, FileList.Task, Task
52
51
  {
53
52
  @Config("bucket")
54
53
  public String getBucket();
@@ -64,16 +63,10 @@ public abstract class AbstractS3FileInputPlugin
64
63
  @ConfigDefault("null")
65
64
  public Optional<String> getAccessKeyId();
66
65
 
67
- @Config("secret_access_key")
68
- @ConfigDefault("null")
69
- public Optional<String> getSecretAccessKey();
70
-
71
66
  // TODO timeout, ssl, etc
72
67
 
73
- // TODO support more options such as STS
74
-
75
- public List<String> getFiles();
76
- public void setFiles(List<String> files);
68
+ public FileList getFiles();
69
+ public void setFiles(FileList files);
77
70
 
78
71
  @ConfigInject
79
72
  public BufferAllocator getBufferAllocator();
@@ -90,7 +83,7 @@ public abstract class AbstractS3FileInputPlugin
90
83
  task.setFiles(listFiles(task));
91
84
 
92
85
  // number of processors is same with number of files
93
- return resume(task.dump(), task.getFiles().size(), control);
86
+ return resume(task.dump(), task.getFiles().getTaskCount(), control);
94
87
  }
95
88
 
96
89
  @Override
@@ -109,16 +102,7 @@ public abstract class AbstractS3FileInputPlugin
109
102
  ConfigDiff configDiff = Exec.newConfigDiff();
110
103
 
111
104
  // last_path
112
- if (task.getFiles().isEmpty()) {
113
- // keep the last value
114
- if (task.getLastPath().isPresent()) {
115
- configDiff.set("last_path", task.getLastPath().get());
116
- }
117
- } else {
118
- List<String> files = new ArrayList<String>(task.getFiles());
119
- Collections.sort(files);
120
- configDiff.set("last_path", files.get(files.size() - 1));
121
- }
105
+ configDiff.set("last_path", task.getFiles().getLastPath(task.getLastPath()));
122
106
 
123
107
  return configDiff;
124
108
  }
@@ -138,24 +122,7 @@ public abstract class AbstractS3FileInputPlugin
138
122
 
139
123
  protected AWSCredentialsProvider getCredentialsProvider(PluginTask task)
140
124
  {
141
- final AWSCredentials cred;
142
- if (task.getAccessKeyId().isPresent()) {
143
- cred = new BasicAWSCredentials(
144
- task.getAccessKeyId().get(),
145
- task.getSecretAccessKey().get());
146
- } else {
147
- cred = new AnonymousAWSCredentials();
148
- }
149
- return new AWSCredentialsProvider() {
150
- public AWSCredentials getCredentials()
151
- {
152
- return cred;
153
- }
154
-
155
- public void refresh()
156
- {
157
- }
158
- };
125
+ return AwsCredentials.getAWSCredentialsProvider(task);
159
126
  }
160
127
 
161
128
  protected ClientConfiguration getClientConfiguration(PluginTask task)
@@ -170,7 +137,7 @@ public abstract class AbstractS3FileInputPlugin
170
137
  return clientConfig;
171
138
  }
172
139
 
173
- private List<String> listFiles(PluginTask task)
140
+ private FileList listFiles(PluginTask task)
174
141
  {
175
142
  AmazonS3Client client = newS3Client(task);
176
143
  String bucketName = task.getBucket();
@@ -179,7 +146,10 @@ public abstract class AbstractS3FileInputPlugin
179
146
  log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
180
147
  }
181
148
 
182
- return listS3FilesByPrefix(client, bucketName, task.getPathPrefix(), task.getLastPath());
149
+ FileList.Builder builder = new FileList.Builder(task);
150
+ listS3FilesByPrefix(builder, client, bucketName,
151
+ task.getPathPrefix(), task.getLastPath());
152
+ return builder.build();
183
153
  }
184
154
 
185
155
  /**
@@ -187,24 +157,24 @@ public abstract class AbstractS3FileInputPlugin
187
157
  *
188
158
  * The resulting list does not include the file that's size == 0.
189
159
  */
190
- public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName,
160
+ public static void listS3FilesByPrefix(FileList.Builder builder,
161
+ AmazonS3Client client, String bucketName,
191
162
  String prefix, Optional<String> lastPath)
192
163
  {
193
- ImmutableList.Builder<String> builder = ImmutableList.builder();
194
-
195
164
  String lastKey = lastPath.orNull();
196
165
  do {
197
166
  ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
198
167
  ObjectListing ol = client.listObjects(req);
199
- for(S3ObjectSummary s : ol.getObjectSummaries()) {
168
+ for (S3ObjectSummary s : ol.getObjectSummaries()) {
200
169
  if (s.getSize() > 0) {
201
- builder.add(s.getKey());
170
+ builder.add(s.getKey(), s.getSize());
171
+ if (!builder.needsMore()) {
172
+ return;
173
+ }
202
174
  }
203
175
  }
204
176
  lastKey = ol.getNextMarker();
205
177
  } while(lastKey != null);
206
-
207
- return builder.build();
208
178
  }
209
179
 
210
180
  @Override
@@ -308,24 +278,22 @@ public abstract class AbstractS3FileInputPlugin
308
278
  {
309
279
  private AmazonS3Client client;
310
280
  private final String bucket;
311
- private final String key;
312
- private boolean opened = false;
281
+ private final Iterator<String> iterator;
313
282
 
314
283
  public SingleFileProvider(PluginTask task, int taskIndex)
315
284
  {
316
285
  this.client = newS3Client(task);
317
286
  this.bucket = task.getBucket();
318
- this.key = task.getFiles().get(taskIndex);
287
+ this.iterator = task.getFiles().get(taskIndex).iterator();
319
288
  }
320
289
 
321
290
  @Override
322
291
  public InputStream openNext() throws IOException
323
292
  {
324
- if (opened) {
293
+ if (!iterator.hasNext()) {
325
294
  return null;
326
295
  }
327
- opened = true;
328
- GetObjectRequest request = new GetObjectRequest(bucket, key);
296
+ GetObjectRequest request = new GetObjectRequest(bucket, iterator.next());
329
297
  S3Object obj = client.getObject(request);
330
298
  return new ResumableInputStream(obj.getObjectContent(), new S3InputStreamReopener(client, request, obj.getObjectMetadata().getContentLength()));
331
299
  }
@@ -0,0 +1,179 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.amazonaws.auth.AWSCredentials;
5
+ import com.amazonaws.auth.AWSCredentialsProvider;
6
+ import com.amazonaws.auth.AWSSessionCredentials;
7
+ import com.amazonaws.auth.AWSSessionCredentialsProvider;
8
+ import com.amazonaws.auth.AnonymousAWSCredentials;
9
+ import com.amazonaws.auth.BasicAWSCredentials;
10
+ import com.amazonaws.auth.BasicSessionCredentials;
11
+ import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
12
+ import com.amazonaws.auth.InstanceProfileCredentialsProvider;
13
+ import com.amazonaws.auth.SystemPropertiesCredentialsProvider;
14
+ import com.amazonaws.auth.profile.ProfileCredentialsProvider;
15
+ import com.amazonaws.auth.profile.ProfilesConfigFile;
16
+ import org.embulk.config.ConfigException;
17
+ import org.embulk.spi.Exec;
18
+ import org.embulk.spi.unit.LocalFile;
19
+ import org.slf4j.Logger;
20
+
21
+ public abstract class AwsCredentials
22
+ {
23
+ private AwsCredentials() { }
24
+
25
+ public static AWSCredentialsProvider getAWSCredentialsProvider(AwsCredentialsTask task)
26
+ {
27
+ switch (task.getAuthMethod()) {
28
+ case "basic":
29
+ // for backward compatibility
30
+ if (!task.getAccessKeyId().isPresent() && !task.getAccessKeyId().isPresent()) {
31
+ final Logger log = Exec.getLogger(AwsCredentials.class);
32
+ log.warn("Both access_key_id and secret_access_key are not set. Assuming that 'auth_method: anonymous' option is set.");
33
+ log.warn("If you intentionally use anonymous authentication, please set 'auth_method: anonymous' option.");
34
+ log.warn("This behavior will be removed in a futurte release.");
35
+ reject(task.getSessionToken(), "session_token");
36
+ reject(task.getProfileFile(), "profile_file");
37
+ reject(task.getProfileName(), "profile_name");
38
+ return new AWSCredentialsProvider() {
39
+ public AWSCredentials getCredentials()
40
+ {
41
+ return new AnonymousAWSCredentials();
42
+ }
43
+
44
+ public void refresh() { }
45
+ };
46
+ }
47
+ else {
48
+ String accessKeyId = require(task.getAccessKeyId(), "'access_key_id', 'secret_access_key'");
49
+ String secretAccessKey = require(task.getSecretAccessKey(), "'secret_access_key'");
50
+ reject(task.getSessionToken(), "session_token");
51
+ reject(task.getProfileFile(), "profile_file");
52
+ reject(task.getProfileName(), "profile_name");
53
+ final BasicAWSCredentials creds = new BasicAWSCredentials(accessKeyId, secretAccessKey);
54
+ return new AWSCredentialsProvider() {
55
+ public AWSCredentials getCredentials()
56
+ {
57
+ return creds;
58
+ }
59
+
60
+ public void refresh() { }
61
+ };
62
+ }
63
+
64
+ case "env":
65
+ reject(task.getAccessKeyId(), "access_key_id");
66
+ reject(task.getSecretAccessKey(), "secret_access_key");
67
+ reject(task.getSessionToken(), "session_token");
68
+ reject(task.getProfileFile(), "profile_file");
69
+ reject(task.getProfileName(), "profile_name");
70
+ return overwriteBasicCredentials(task, new EnvironmentVariableCredentialsProvider().getCredentials());
71
+
72
+ case "instance":
73
+ reject(task.getAccessKeyId(), "access_key_id");
74
+ reject(task.getSecretAccessKey(), "secret_access_key");
75
+ reject(task.getSessionToken(), "session_token");
76
+ reject(task.getProfileFile(), "profile_file");
77
+ reject(task.getProfileName(), "profile_name");
78
+ return new InstanceProfileCredentialsProvider();
79
+
80
+ case "profile":
81
+ {
82
+ reject(task.getAccessKeyId(), "access_key_id");
83
+ reject(task.getSecretAccessKey(), "secret_access_key");
84
+ reject(task.getSessionToken(), "session_token");
85
+
86
+ String profileName = task.getProfileName().or("default");
87
+ ProfileCredentialsProvider provider;
88
+ if (task.getProfileFile().isPresent()) {
89
+ ProfilesConfigFile file = new ProfilesConfigFile(task.getProfileFile().get().getFile());
90
+ provider = new ProfileCredentialsProvider(file, profileName);
91
+ }
92
+ else {
93
+ provider = new ProfileCredentialsProvider(profileName);
94
+ }
95
+ task.setProfileName(Optional.<String>absent());
96
+ task.setProfileFile(Optional.<LocalFile>absent());
97
+
98
+ return overwriteBasicCredentials(task, provider.getCredentials());
99
+ }
100
+
101
+ case "properties":
102
+ reject(task.getAccessKeyId(), "access_key_id");
103
+ reject(task.getSecretAccessKey(), "secret_access_key");
104
+ reject(task.getSessionToken(), "session_token");
105
+ reject(task.getProfileFile(), "profile_file");
106
+ reject(task.getProfileName(), "profile_name");
107
+ return overwriteBasicCredentials(task, new SystemPropertiesCredentialsProvider().getCredentials());
108
+
109
+ case "anonymous":
110
+ reject(task.getAccessKeyId(), "access_key_id");
111
+ reject(task.getSecretAccessKey(), "secret_access_key");
112
+ reject(task.getSessionToken(), "session_token");
113
+ reject(task.getProfileFile(), "profile_file");
114
+ reject(task.getProfileName(), "profile_name");
115
+ return new AWSCredentialsProvider() {
116
+ public AWSCredentials getCredentials()
117
+ {
118
+ return new AnonymousAWSCredentials();
119
+ }
120
+
121
+ public void refresh() { }
122
+ };
123
+
124
+ case "session":
125
+ {
126
+ String accessKeyId = require(task.getAccessKeyId(), "'access_key_id', 'secret_access_key', 'session_token'");
127
+ String secretAccessKey = require(task.getSecretAccessKey(), "'secret_access_key', 'session_token'");
128
+ String sessionToken = require(task.getSessionToken(), "'session_token'");
129
+ reject(task.getProfileFile(), "profile_file");
130
+ reject(task.getProfileName(), "profile_name");
131
+ final AWSSessionCredentials creds = new BasicSessionCredentials(accessKeyId, secretAccessKey, sessionToken);
132
+ return new AWSSessionCredentialsProvider() {
133
+ public AWSSessionCredentials getCredentials()
134
+ {
135
+ return creds;
136
+ }
137
+
138
+ public void refresh() { }
139
+ };
140
+ }
141
+
142
+ default:
143
+ throw new ConfigException(String.format("Unknwon auth_method '%s'. Supported methods are basic, instance, profile, properties, anonymous, and session.",
144
+ task.getAuthMethod()));
145
+ }
146
+ }
147
+
148
+ private static AWSCredentialsProvider overwriteBasicCredentials(AwsCredentialsTask task, final AWSCredentials creds)
149
+ {
150
+ task.setAuthMethod("basic");
151
+ task.setAccessKeyId(Optional.of(creds.getAWSAccessKeyId()));
152
+ task.setSecretAccessKey(Optional.of(creds.getAWSSecretKey()));
153
+ return new AWSCredentialsProvider() {
154
+ public AWSCredentials getCredentials()
155
+ {
156
+ return creds;
157
+ }
158
+
159
+ public void refresh() { }
160
+ };
161
+ }
162
+
163
+ private static <T> T require(Optional<T> value, String message)
164
+ {
165
+ if (value.isPresent()) {
166
+ return value.get();
167
+ }
168
+ else {
169
+ throw new ConfigException("Required option is not set: " + message);
170
+ }
171
+ }
172
+
173
+ private static <T> void reject(Optional<T> value, String message)
174
+ {
175
+ if (value.isPresent()) {
176
+ throw new ConfigException("Invalid option is set: " + message);
177
+ }
178
+ }
179
+ }
@@ -0,0 +1,39 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.spi.unit.LocalFile;
7
+
8
+ public interface AwsCredentialsTask
9
+ {
10
+ @Config("auth_method")
11
+ @ConfigDefault("\"basic\"")
12
+ String getAuthMethod();
13
+ void setAuthMethod(String method);
14
+
15
+ @Config("access_key_id")
16
+ @ConfigDefault("null")
17
+ Optional<String> getAccessKeyId();
18
+ void setAccessKeyId(Optional<String> value);
19
+
20
+ @Config("secret_access_key")
21
+ @ConfigDefault("null")
22
+ Optional<String> getSecretAccessKey();
23
+ void setSecretAccessKey(Optional<String> value);
24
+
25
+ @Config("session_token")
26
+ @ConfigDefault("null")
27
+ Optional<String> getSessionToken();
28
+ void setSessionToken(Optional<String> value);
29
+
30
+ @Config("profile_file")
31
+ @ConfigDefault("null")
32
+ Optional<LocalFile> getProfileFile();
33
+ void setProfileFile(Optional<LocalFile> value);
34
+
35
+ @Config("profile_name")
36
+ @ConfigDefault("null")
37
+ Optional<String> getProfileName();
38
+ void setProfileName(Optional<String> value);
39
+ }
@@ -0,0 +1,289 @@
1
+ package org.embulk.input.s3;
2
+
3
+ import java.util.List;
4
+ import java.util.AbstractList;
5
+ import java.util.ArrayList;
6
+ import java.util.zip.GZIPInputStream;
7
+ import java.util.zip.GZIPOutputStream;
8
+ import java.io.InputStream;
9
+ import java.io.OutputStream;
10
+ import java.io.BufferedOutputStream;
11
+ import java.io.BufferedInputStream;
12
+ import java.io.ByteArrayInputStream;
13
+ import java.io.ByteArrayOutputStream;
14
+ import java.io.IOException;
15
+ import java.nio.ByteBuffer;
16
+ import java.nio.charset.StandardCharsets;
17
+ import org.embulk.config.Config;
18
+ import org.embulk.config.ConfigDefault;
19
+ import org.embulk.config.ConfigSource;
20
+ import com.google.common.base.Throwables;
21
+ import com.google.common.base.Optional;
22
+ import com.google.common.collect.ImmutableList;
23
+ import com.fasterxml.jackson.annotation.JsonProperty;
24
+ import com.fasterxml.jackson.annotation.JsonIgnore;
25
+ import com.fasterxml.jackson.annotation.JsonCreator;
26
+
27
+ // this class should be moved to embulk-core
28
+ public class FileList
29
+ {
30
+ public interface Task
31
+ {
32
+ @Config("total_file_count_limit")
33
+ @ConfigDefault("2147483647")
34
+ int getTotalFileCountLimit();
35
+ }
36
+
37
+ public static class Entry
38
+ {
39
+ private int index;
40
+ private long size;
41
+
42
+ @JsonCreator
43
+ public Entry(
44
+ @JsonProperty("index") int index,
45
+ @JsonProperty("size") long size)
46
+ {
47
+ this.index = index;
48
+ this.size = size;
49
+ }
50
+
51
+ @JsonProperty("index")
52
+ public int getIndex() { return index; }
53
+
54
+ @JsonProperty("size")
55
+ public long getSize() { return size; }
56
+ }
57
+
58
+ public static class Builder
59
+ {
60
+ private final ByteArrayOutputStream binary;
61
+ private final OutputStream stream;
62
+ private final List<Entry> entries = new ArrayList<>();
63
+ private String last = null;
64
+
65
+ private int limitCount = Integer.MAX_VALUE;
66
+ private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
67
+
68
+ public Builder(Task task)
69
+ {
70
+ this();
71
+ this.limitCount = task.getTotalFileCountLimit();
72
+ }
73
+
74
+ public Builder(ConfigSource config)
75
+ {
76
+ this();
77
+ this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
78
+ }
79
+
80
+ public Builder()
81
+ {
82
+ binary = new ByteArrayOutputStream();
83
+ try {
84
+ stream = new BufferedOutputStream(new GZIPOutputStream(binary));
85
+ }
86
+ catch (IOException ex) {
87
+ throw Throwables.propagate(ex);
88
+ }
89
+ }
90
+
91
+ public Builder limitTotalFileCount(int limitCount)
92
+ {
93
+ this.limitCount = limitCount;
94
+ return this;
95
+ }
96
+
97
+ public int size()
98
+ {
99
+ return entries.size();
100
+ }
101
+
102
+ public boolean needsMore()
103
+ {
104
+ return size() < limitCount;
105
+ }
106
+
107
+ public synchronized boolean add(String path, long size)
108
+ {
109
+ // TODO throw IllegalStateException if stream is already closed
110
+
111
+ if (!needsMore()) {
112
+ return false;
113
+ }
114
+
115
+ // TODO in the future, support some other filtering parameters (file name suffix filter, regex filter, etc)
116
+ // and return false if filtered out.
117
+
118
+ int index = entries.size();
119
+ entries.add(new Entry(index, size));
120
+
121
+ byte[] data = path.getBytes(StandardCharsets.UTF_8);
122
+ castBuffer.putInt(0, data.length);
123
+ try {
124
+ stream.write(castBuffer.array());
125
+ stream.write(data);
126
+ }
127
+ catch (IOException ex) {
128
+ throw Throwables.propagate(ex);
129
+ }
130
+
131
+ last = path;
132
+ return true;
133
+ }
134
+
135
+ public FileList build()
136
+ {
137
+ try {
138
+ stream.close();
139
+ }
140
+ catch (IOException ex) {
141
+ throw Throwables.propagate(ex);
142
+ }
143
+ return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
144
+ }
145
+
146
+ private List<List<Entry>> getSplits(List<Entry> all)
147
+ {
148
+ // TODO combine multiple entries into one task using some configuration parameters
149
+ List<List<Entry>> tasks = new ArrayList<>();
150
+ for (Entry entry : all) {
151
+ tasks.add(ImmutableList.of(entry));
152
+ }
153
+ return tasks;
154
+ }
155
+ }
156
+
157
+ private final byte[] data;
158
+ private final List<List<Entry>> tasks;
159
+ private final Optional<String> last;
160
+
161
+ @JsonCreator
162
+ @Deprecated
163
+ public FileList(
164
+ @JsonProperty("data") byte[] data,
165
+ @JsonProperty("tasks") List<List<Entry>> tasks,
166
+ @JsonProperty("last") Optional<String> last)
167
+ {
168
+ this.data = data;
169
+ this.tasks = tasks;
170
+ this.last = last;
171
+ }
172
+
173
+ @JsonIgnore
174
+ public Optional<String> getLastPath(Optional<String> lastLastPath)
175
+ {
176
+ if (last.isPresent()) {
177
+ return last;
178
+ }
179
+ return lastLastPath;
180
+ }
181
+
182
+ @JsonIgnore
183
+ public int getTaskCount()
184
+ {
185
+ return tasks.size();
186
+ }
187
+
188
+ @JsonIgnore
189
+ public List<String> get(int i)
190
+ {
191
+ return new EntryList(data, tasks.get(i));
192
+ }
193
+
194
+ @JsonProperty("data")
195
+ @Deprecated
196
+ public byte[] getData()
197
+ {
198
+ return data;
199
+ }
200
+
201
+ @JsonProperty("tasks")
202
+ @Deprecated
203
+ public List<List<Entry>> getTasks()
204
+ {
205
+ return tasks;
206
+ }
207
+
208
+ @JsonProperty("last")
209
+ @Deprecated
210
+ public Optional<String> getLast()
211
+ {
212
+ return last;
213
+ }
214
+
215
+ private class EntryList
216
+ extends AbstractList<String>
217
+ {
218
+ private final byte[] data;
219
+ private final List<Entry> entries;
220
+ private InputStream stream;
221
+ private int current;
222
+
223
+ private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
224
+
225
+ public EntryList(byte[] data, List<Entry> entries)
226
+ {
227
+ this.data = data;
228
+ this.entries = entries;
229
+ try {
230
+ this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
231
+ }
232
+ catch (IOException ex) {
233
+ throw Throwables.propagate(ex);
234
+ }
235
+ this.current = 0;
236
+ }
237
+
238
+ @Override
239
+ public synchronized String get(int i)
240
+ {
241
+ Entry e = entries.get(i);
242
+ if (e.getIndex() < current) {
243
+ // rewind to the head
244
+ try {
245
+ stream.close();
246
+ stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
247
+ }
248
+ catch (IOException ex) {
249
+ throw Throwables.propagate(ex);
250
+ }
251
+ current = 0;
252
+ }
253
+
254
+ while (current < e.getIndex()) {
255
+ readNext();
256
+ }
257
+ // now current == e.getIndex()
258
+ return readNextString();
259
+ }
260
+
261
+ @Override
262
+ public int size()
263
+ {
264
+ return entries.size();
265
+ }
266
+
267
+ private byte[] readNext()
268
+ {
269
+ try {
270
+ stream.read(castBuffer.array());
271
+ int n = castBuffer.getInt(0);
272
+ byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
273
+ stream.read(b);
274
+
275
+ current++;
276
+
277
+ return b;
278
+ }
279
+ catch (IOException ex) {
280
+ throw Throwables.propagate(ex);
281
+ }
282
+ }
283
+
284
+ private String readNextString()
285
+ {
286
+ return new String(readNext(), StandardCharsets.UTF_8);
287
+ }
288
+ }
289
+ }
@@ -72,7 +72,9 @@ public class TestS3FileInputPlugin
72
72
  doReturn("in/file/").doReturn(null).when(ol).getNextMarker();
73
73
 
74
74
  // It counts only size != 0 files.
75
- assertEquals(1, S3FileInputPlugin.listS3FilesByPrefix(client, "bucketName", "prefix", Optional.<String>absent()).size());
75
+ FileList.Builder builder = new FileList.Builder();
76
+ S3FileInputPlugin.listS3FilesByPrefix(builder, client, "bucketName", "prefix", Optional.<String>absent());
77
+ assertEquals(1, builder.size());
76
78
  }
77
79
 
78
80
  @Test
@@ -90,7 +92,7 @@ public class TestS3FileInputPlugin
90
92
  public List<TaskReport> run(TaskSource taskSource, int taskCount)
91
93
  {
92
94
  assertEquals(3, taskCount);
93
- List<String> files = taskSource.loadTask(S3PluginTask.class).getFiles();
95
+ List<String> files = fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles());
94
96
  assertArrayEquals(new String[]{"in/aa/a", "in/aa/b", "in/aa/c"}, files.toArray(new String[files.size()]));
95
97
  return emptyTaskReports(taskCount);
96
98
  }
@@ -108,12 +110,12 @@ public class TestS3FileInputPlugin
108
110
  public List<TaskReport> run(TaskSource taskSource, int taskCount)
109
111
  {
110
112
  assertEquals(0, taskCount);
111
- assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
113
+ assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
112
114
  return emptyTaskReports(taskCount);
113
115
  }
114
116
  });
115
117
 
116
- assertFalse(configDiff.has("last_path"));
118
+ assertEquals(null, configDiff.get(String.class, "last_path", null));
117
119
  }
118
120
 
119
121
  { // if files are empty, keep the previous last_path.
@@ -126,7 +128,7 @@ public class TestS3FileInputPlugin
126
128
  @Override
127
129
  public List<TaskReport> run(TaskSource taskSource, int taskCount) {
128
130
  assertEquals(0, taskCount);
129
- assertTrue(taskSource.loadTask(S3PluginTask.class).getFiles().isEmpty());
131
+ assertTrue(fileListToList(taskSource.loadTask(S3PluginTask.class).getFiles()).isEmpty());
130
132
  return emptyTaskReports(taskCount);
131
133
  }
132
134
  });
@@ -143,7 +145,9 @@ public class TestS3FileInputPlugin
143
145
  doReturn(client).when(plugin).newS3Client(any(PluginTask.class));
144
146
 
145
147
  PluginTask task = config.loadConfig(plugin.getTaskClass());
146
- task.setFiles(Arrays.asList(new String[]{"in/aa/a"}));
148
+ FileList.Builder builder = new FileList.Builder();
149
+ builder.add("in/aa/a", 100);
150
+ task.setFiles(builder.build());
147
151
 
148
152
  StringBuilder sbuf = new StringBuilder();
149
153
  try (S3FileInput input = (S3FileInput) plugin.open(task.dump(), 0)) {
@@ -218,4 +222,15 @@ public class TestS3FileInputPlugin
218
222
  }
219
223
  return reports.build();
220
224
  }
225
+
226
+ private static List<String> fileListToList(FileList list)
227
+ {
228
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
229
+ for (int i=0; i < list.getTaskCount(); i++) {
230
+ for (String path : list.get(i)) {
231
+ builder.add(path);
232
+ }
233
+ }
234
+ return builder.build();
235
+ }
221
236
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-15 00:00:00.000000000 Z
11
+ date: 2015-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -48,18 +48,20 @@ files:
48
48
  - build.gradle
49
49
  - lib/embulk/input/s3.rb
50
50
  - src/main/java/org/embulk/input/s3/AbstractS3FileInputPlugin.java
51
+ - src/main/java/org/embulk/input/s3/AwsCredentials.java
52
+ - src/main/java/org/embulk/input/s3/AwsCredentialsTask.java
53
+ - src/main/java/org/embulk/input/s3/FileList.java
51
54
  - src/main/java/org/embulk/input/s3/S3FileInputPlugin.java
52
55
  - src/test/java/org/embulk/input/s3/TestS3FileInputPlugin.java
53
56
  - src/test/java/org/embulk/input/s3/TestS3InputStreamReopener.java
54
- - classpath/aws-java-sdk-core-1.9.22.jar
55
- - classpath/aws-java-sdk-kms-1.9.22.jar
56
- - classpath/aws-java-sdk-s3-1.9.22.jar
57
+ - classpath/aws-java-sdk-core-1.10.33.jar
58
+ - classpath/aws-java-sdk-kms-1.10.33.jar
59
+ - classpath/aws-java-sdk-s3-1.10.33.jar
57
60
  - classpath/commons-codec-1.6.jar
58
- - classpath/embulk-input-s3-0.2.3.jar
59
- - classpath/httpclient-4.3.4.jar
60
- - classpath/httpcore-4.3.2.jar
61
+ - classpath/embulk-input-s3-0.2.4.jar
62
+ - classpath/httpclient-4.3.6.jar
63
+ - classpath/httpcore-4.3.3.jar
61
64
  - classpath/jcl-over-slf4j-1.7.12.jar
62
- - classpath/joda-time-2.8.2.jar
63
65
  homepage: https://github.com/embulk/embulk-input-s3
64
66
  licenses:
65
67
  - Apache 2.0