embulk-input-azure_blob_storage 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a85c5e311d426a1f67c47b4947683827a3bb2140
4
- data.tar.gz: 2ae6c9a16e143e4731e4de7518fa504d2cb2f8c1
3
+ metadata.gz: 49b71df05579f8c392ec70089aaeca95f1b81144
4
+ data.tar.gz: a63ffb1663b9bbed5827211dbb62eeb04b20a987
5
5
  SHA512:
6
- metadata.gz: 68588df49d434087541ea542a4731236d5496356df5b66bdb81f620803c305f76c33b39ff7340d176378f80c0f41e0275efedc95b180198828776218a3148f55
7
- data.tar.gz: 7cc8c4758c67360d7cbbd5cf4ca12bfe8f8b975641fcc31fa2b3308e4ba3c8093e7aa0edf577e078d9f3441e8636bae700dc07fbb16c0b181ec71742b3ae963c
6
+ metadata.gz: 1a43a45c850746c1a50b9f12be823f0081ea9deddd4403e831f2533e6af4ab4861405e45cc62824fcb637b6509178fceeb6cad4f583542677e0ee3383beb8e43
7
+ data.tar.gz: d5e6066c047f60034d50ff6084919157476f962adf5c9f2b1a59685a97d83a254637ed0037276629714e2b1223c229c0564b58c162cfffbb58b10a4822f563aa
@@ -1,6 +1,11 @@
1
+ ## 0.1.4 - 2015-03-22
2
+
3
+ * [new feature] Support `last_path` option [#7](https://github.com/sakama/embulk-input-azure_blob_storage/pull/7)
4
+ * [new feature] Support `path_match_pattern` option [#6](https://github.com/sakama/embulk-input-azure_blob_storage/pull/6)
5
+
1
6
  ## 0.1.3 - 2015-03-16
2
7
 
3
- * [maintenance] Add unit test[#4](https://github.com/sakama/embulk-input-azure_blob_storage/pull/4)
8
+ * [maintenance] Add unit test [#4](https://github.com/sakama/embulk-input-azure_blob_storage/pull/4)
4
9
  * [maintenance] Add retry logic [#3](https://github.com/sakama/embulk-input-azure_blob_storage/pull/3)
5
10
 
6
11
  ## 0.1.2 - 2015-10-11
data/README.md CHANGED
@@ -16,6 +16,8 @@ First, create Azure [Storage Account](https://azure.microsoft.com/en-us/document
16
16
  - **account_key**: primary access key (string, required)
17
17
  - **container**: container name data stored (string, required)
18
18
  - **path_prefix**: prefix of target keys (string, required) (string, required)
19
+ - **path_match_pattern**: regexp to match file paths. If a file path doesn't match with this pattern, the file will be skipped (regexp string, optional)
20
+ - **total_file_count_limit**: maximum number of files to read (integer, optional)
19
21
 
20
22
  ## Example
21
23
 
@@ -55,6 +57,21 @@ in:
55
57
  out: {type: stdout}
56
58
  ```
57
59
 
60
+ To filter files using regexp:
61
+
62
+ ```yaml
63
+ in:
64
+ type: sftp
65
+ path_prefix: logs/csv-
66
+ ...
67
+ path_match_pattern: \.csv$ # a file will be skipped if its path doesn't match with this pattern
68
+
69
+ ## some examples of regexp:
70
+ #path_match_pattern: /archive/ # match files in .../archive/... directory
71
+ #path_match_pattern: /data1/|/data2/ # match files in .../data1/... or .../data2/... directory
72
+ #path_match_pattern: .csv$|.csv.gz$ # match files whose suffix is .csv or .csv.gz
73
+ ```
74
+
58
75
  ## Build
59
76
 
60
77
  ```
@@ -17,7 +17,7 @@ configurations {
17
17
  sourceCompatibility = 1.7
18
18
  targetCompatibility = 1.7
19
19
 
20
- version = "0.1.3"
20
+ version = "0.1.4"
21
21
 
22
22
  dependencies {
23
23
  compile "org.embulk:embulk-core:0.8.2"
@@ -1,10 +1,12 @@
1
1
  package org.embulk.input.azure_blob_storage;
2
2
 
3
+ import com.google.common.base.Charsets;
3
4
  import com.google.common.base.Optional;
4
5
  import com.google.common.base.Throwables;
5
- import com.google.common.collect.ImmutableList;
6
+ import com.google.common.io.BaseEncoding;
6
7
  import com.microsoft.azure.storage.CloudStorageAccount;
7
8
  import com.microsoft.azure.storage.ResultContinuation;
9
+ import com.microsoft.azure.storage.ResultContinuationType;
8
10
  import com.microsoft.azure.storage.ResultSegment;
9
11
  import com.microsoft.azure.storage.StorageException;
10
12
  import com.microsoft.azure.storage.blob.CloudBlob;
@@ -31,15 +33,14 @@ import java.io.IOException;
31
33
  import java.io.InputStream;
32
34
  import java.net.URISyntaxException;
33
35
  import java.security.InvalidKeyException;
34
- import java.util.ArrayList;
35
- import java.util.Collections;
36
+ import java.util.Iterator;
36
37
  import java.util.List;
37
38
 
38
39
  public class AzureBlobStorageFileInputPlugin
39
40
  implements FileInputPlugin
40
41
  {
41
42
  public interface PluginTask
42
- extends Task
43
+ extends Task, FileList.Task
43
44
  {
44
45
  @Config("account_name")
45
46
  String getAccountName();
@@ -65,9 +66,8 @@ public class AzureBlobStorageFileInputPlugin
65
66
  @ConfigDefault("5") // 5 times retry to connect sftp server if failed.
66
67
  int getMaxConnectionRetry();
67
68
 
68
- List<String> getFiles();
69
-
70
- void setFiles(List<String> files);
69
+ FileList getFiles();
70
+ void setFiles(FileList files);
71
71
 
72
72
  @ConfigInject
73
73
  BufferAllocator getBufferAllocator();
@@ -83,28 +83,18 @@ public class AzureBlobStorageFileInputPlugin
83
83
  CloudBlobClient blobClient = newAzureClient(task.getAccountName(), task.getAccountKey());
84
84
  task.setFiles(listFiles(blobClient, task));
85
85
 
86
- return resume(task.dump(), task.getFiles().size(), control);
86
+ return resume(task.dump(), task.getFiles().getTaskCount(), control);
87
87
  }
88
88
 
89
89
  @Override
90
90
  public ConfigDiff resume(TaskSource taskSource, int taskCount, FileInputPlugin.Control control)
91
91
  {
92
92
  PluginTask task = taskSource.loadTask(PluginTask.class);
93
-
94
93
  control.run(taskSource, taskCount);
95
94
 
96
95
  ConfigDiff configDiff = Exec.newConfigDiff();
96
+ configDiff.set("last_path", task.getFiles().getLastPath(task.getLastPath()));
97
97
 
98
- List<String> files = new ArrayList<>(task.getFiles());
99
- if (files.isEmpty()) {
100
- if (task.getLastPath().isPresent()) {
101
- configDiff.set("last_path", task.getLastPath().get());
102
- }
103
- }
104
- else {
105
- Collections.sort(files);
106
- configDiff.set("last_path", files.get(files.size() - 1));
107
- }
108
98
  return configDiff;
109
99
  }
110
100
 
@@ -129,22 +119,28 @@ public class AzureBlobStorageFileInputPlugin
129
119
  return account.createCloudBlobClient();
130
120
  }
131
121
 
132
- private List<String> listFiles(CloudBlobClient client, PluginTask task)
122
+ private FileList listFiles(CloudBlobClient client, PluginTask task)
133
123
  {
134
124
  if (task.getPathPrefix().equals("/")) {
135
125
  log.info("Listing files with prefix \"/\". This doesn't mean all files in a bucket. If you intend to read all files, use \"path_prefix: ''\" (empty string) instead.");
136
126
  }
127
+ FileList.Builder builder = new FileList.Builder(task);
137
128
 
138
- return listFilesWithPrefix(client, task.getContainer(), task.getPathPrefix(), task.getLastPath(), task.getMaxResults());
129
+ return listFilesWithPrefix(builder, client, task.getContainer(), task.getPathPrefix(), task.getLastPath(), task.getMaxResults());
139
130
  }
140
131
 
141
- private static List<String> listFilesWithPrefix(CloudBlobClient client, String containerName,
132
+ private static FileList listFilesWithPrefix(FileList.Builder builder, CloudBlobClient client, String containerName,
142
133
  String prefix, Optional<String> lastPath, int maxResults)
143
134
  {
144
- ImmutableList.Builder<String> builder = ImmutableList.builder();
145
- // It seems I can't cast lastKey<String> to token<ResultContinuation> by Azure SDK for Java
146
- String lastKey = lastPath.orNull();
135
+ String lastKey = (lastPath.isPresent() && !lastPath.get().isEmpty()) ? createNextToken(lastPath.get()) : null;
147
136
  ResultContinuation token = null;
137
+ if (lastKey != null) {
138
+ token = new ResultContinuation();
139
+ token.setContinuationType(ResultContinuationType.BLOB);
140
+ log.debug("lastPath: {}", lastPath.get());
141
+ log.debug("lastPath(Base64encoded): {}", lastKey);
142
+ token.setNextMarker(lastKey);
143
+ }
148
144
 
149
145
  try {
150
146
  CloudBlobContainer container = client.getContainerReference(containerName);
@@ -156,7 +152,7 @@ public class AzureBlobStorageFileInputPlugin
156
152
  if (blobItem instanceof CloudBlob) {
157
153
  CloudBlob blob = (CloudBlob) blobItem;
158
154
  if (blob.exists() && !blob.getUri().toString().endsWith("/")) {
159
- builder.add(blob.getName());
155
+ builder.add(blob.getName(), blob.getProperties().getLength());
160
156
  log.debug(String.format("name:%s, class:%s, uri:%s", blob.getName(), blob.getClass(), blob.getUri()));
161
157
  }
162
158
  }
@@ -201,7 +197,7 @@ public class AzureBlobStorageFileInputPlugin
201
197
  {
202
198
  private CloudBlobClient client;
203
199
  private final String containerName;
204
- private final String key;
200
+ private final Iterator<String> iterator;
205
201
  private final int maxConnectionRetry;
206
202
  private boolean opened = false;
207
203
 
@@ -209,14 +205,14 @@ public class AzureBlobStorageFileInputPlugin
209
205
  {
210
206
  this.client = newAzureClient(task.getAccountName(), task.getAccountKey());
211
207
  this.containerName = task.getContainer();
212
- this.key = task.getFiles().get(taskIndex);
208
+ this.iterator = task.getFiles().get(taskIndex).iterator();
213
209
  this.maxConnectionRetry = task.getMaxConnectionRetry();
214
210
  }
215
211
 
216
212
  @Override
217
213
  public InputStream openNext() throws IOException
218
214
  {
219
- if (opened) {
215
+ if (opened || !iterator.hasNext()) {
220
216
  return null;
221
217
  }
222
218
  opened = true;
@@ -225,7 +221,7 @@ public class AzureBlobStorageFileInputPlugin
225
221
  while (true) {
226
222
  try {
227
223
  CloudBlobContainer container = client.getContainerReference(containerName);
228
- CloudBlob blob = container.getBlockBlobReference(key);
224
+ CloudBlob blob = container.getBlockBlobReference(iterator.next());
229
225
  return blob.openInputStream();
230
226
  }
231
227
  catch (StorageException | URISyntaxException ex) {
@@ -250,4 +246,23 @@ public class AzureBlobStorageFileInputPlugin
250
246
  @Override
251
247
  public void close() {}
252
248
  }
249
+
250
+ private static String createNextToken(String path)
251
+ {
252
+ StringBuilder sb = new StringBuilder()
253
+ .append(String.format("%06d", path.length()))
254
+ .append("!")
255
+ .append(path)
256
+ .append("!000028!9999-12-31T23:59:59.9999999Z!");
257
+
258
+ String encodedString = BaseEncoding.base64().encode(sb.toString().getBytes(Charsets.UTF_8));
259
+
260
+ StringBuilder marker = new StringBuilder()
261
+ .append("2")
262
+ .append("!")
263
+ .append(encodedString.length())
264
+ .append("!")
265
+ .append(encodedString);
266
+ return marker.toString();
267
+ }
253
268
  }
@@ -0,0 +1,341 @@
1
+ package org.embulk.input.azure_blob_storage;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonIgnore;
5
+ import com.fasterxml.jackson.annotation.JsonProperty;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.base.Throwables;
8
+ import org.embulk.config.Config;
9
+ import org.embulk.config.ConfigDefault;
10
+ import org.embulk.config.ConfigSource;
11
+ import org.embulk.spi.Exec;
12
+ import org.slf4j.Logger;
13
+
14
+ import java.io.BufferedInputStream;
15
+ import java.io.BufferedOutputStream;
16
+ import java.io.ByteArrayInputStream;
17
+ import java.io.ByteArrayOutputStream;
18
+
19
+ import java.io.IOException;
20
+ import java.io.InputStream;
21
+ import java.io.OutputStream;
22
+ import java.nio.ByteBuffer;
23
+ import java.nio.charset.StandardCharsets;
24
+ import java.util.AbstractList;
25
+ import java.util.ArrayList;
26
+ import java.util.List;
27
+ import java.util.regex.Pattern;
28
+ import java.util.zip.GZIPInputStream;
29
+ import java.util.zip.GZIPOutputStream;
30
+
31
+ // this class should be moved to embulk-core
32
+ public class FileList
33
+ {
34
+ public interface Task
35
+ {
36
+ @Config("path_match_pattern")
37
+ @ConfigDefault("\".*\"")
38
+ String getPathMatchPattern();
39
+
40
+ @Config("total_file_count_limit")
41
+ @ConfigDefault("2147483647")
42
+ int getTotalFileCountLimit();
43
+
44
+ // TODO support more algorithms to combine tasks
45
+ @Config("min_task_size")
46
+ @ConfigDefault("0")
47
+ long getMinTaskSize();
48
+ }
49
+
50
+ public static class Entry
51
+ {
52
+ private int index;
53
+ private long size;
54
+
55
+ @JsonCreator
56
+ public Entry(
57
+ @JsonProperty("index") int index,
58
+ @JsonProperty("size") long size)
59
+ {
60
+ this.index = index;
61
+ this.size = size;
62
+ }
63
+
64
+ @JsonProperty("index")
65
+ public int getIndex()
66
+ {
67
+ return index;
68
+ }
69
+
70
+ @JsonProperty("size")
71
+ public long getSize()
72
+ {
73
+ return size;
74
+ }
75
+ }
76
+
77
+ public static class Builder
78
+ {
79
+ private final Logger log = Exec.getLogger(FileList.class);
80
+ private final ByteArrayOutputStream binary;
81
+ private final OutputStream stream;
82
+ private final List<Entry> entries = new ArrayList<>();
83
+ private String last = null;
84
+
85
+ private int limitCount = Integer.MAX_VALUE;
86
+ private long minTaskSize = 1;
87
+ private Pattern pathMatchPattern;
88
+
89
+ private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
90
+
91
+ public Builder(Task task)
92
+ {
93
+ this();
94
+ this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
95
+ this.limitCount = task.getTotalFileCountLimit();
96
+ this.minTaskSize = task.getMinTaskSize();
97
+ }
98
+
99
+ public Builder(ConfigSource config)
100
+ {
101
+ this();
102
+ this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
103
+ this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
104
+ this.minTaskSize = config.get(long.class, "min_task_size", 0L);
105
+ }
106
+
107
+ public Builder()
108
+ {
109
+ binary = new ByteArrayOutputStream();
110
+ try {
111
+ stream = new BufferedOutputStream(new GZIPOutputStream(binary));
112
+ }
113
+ catch (IOException ex) {
114
+ throw Throwables.propagate(ex);
115
+ }
116
+ }
117
+
118
+ public Builder limitTotalFileCount(int limitCount)
119
+ {
120
+ this.limitCount = limitCount;
121
+ return this;
122
+ }
123
+
124
+ public Builder minTaskSize(long bytes)
125
+ {
126
+ this.minTaskSize = bytes;
127
+ return this;
128
+ }
129
+
130
+ public Builder pathMatchPattern(String pattern)
131
+ {
132
+ this.pathMatchPattern = Pattern.compile(pattern);
133
+ return this;
134
+ }
135
+
136
+ public int size()
137
+ {
138
+ return entries.size();
139
+ }
140
+
141
+ public boolean needsMore()
142
+ {
143
+ return size() < limitCount;
144
+ }
145
+
146
+ // returns true if this file is used
147
+ public synchronized boolean add(String path, long size)
148
+ {
149
+ // TODO throw IllegalStateException if stream is already closed
150
+
151
+ if (!needsMore()) {
152
+ return false;
153
+ }
154
+
155
+ if (!pathMatchPattern.matcher(path).find()) {
156
+ return false;
157
+ }
158
+
159
+ int index = entries.size();
160
+ entries.add(new Entry(index, size));
161
+ log.info("add file to the request list: {}", path);
162
+
163
+ byte[] data = path.getBytes(StandardCharsets.UTF_8);
164
+ castBuffer.putInt(0, data.length);
165
+ try {
166
+ stream.write(castBuffer.array());
167
+ stream.write(data);
168
+ }
169
+ catch (IOException ex) {
170
+ throw Throwables.propagate(ex);
171
+ }
172
+
173
+ last = path;
174
+ return true;
175
+ }
176
+
177
+ public FileList build()
178
+ {
179
+ try {
180
+ stream.close();
181
+ }
182
+ catch (IOException ex) {
183
+ throw Throwables.propagate(ex);
184
+ }
185
+ return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
186
+ }
187
+
188
+ private List<List<Entry>> getSplits(List<Entry> all)
189
+ {
190
+ List<List<Entry>> tasks = new ArrayList<>();
191
+ long currentTaskSize = 0;
192
+ List<Entry> currentTask = new ArrayList<>();
193
+ for (Entry entry : all) {
194
+ currentTask.add(entry);
195
+ currentTaskSize += entry.getSize(); // TODO consider to multiply the size by cost_per_byte, and add cost_per_file
196
+ if (currentTaskSize >= minTaskSize) {
197
+ tasks.add(currentTask);
198
+ currentTask = new ArrayList<>();
199
+ currentTaskSize = 0;
200
+ }
201
+ }
202
+ if (!currentTask.isEmpty()) {
203
+ tasks.add(currentTask);
204
+ }
205
+ return tasks;
206
+ }
207
+ }
208
+
209
+ private final byte[] data;
210
+ private final List<List<Entry>> tasks;
211
+ private final Optional<String> last;
212
+
213
+ @JsonCreator
214
+ @Deprecated
215
+ public FileList(
216
+ @JsonProperty("data") byte[] data,
217
+ @JsonProperty("tasks") List<List<Entry>> tasks,
218
+ @JsonProperty("last") Optional<String> last)
219
+ {
220
+ this.data = data;
221
+ this.tasks = tasks;
222
+ this.last = last;
223
+ }
224
+
225
+ @JsonIgnore
226
+ public Optional<String> getLastPath(Optional<String> lastLastPath)
227
+ {
228
+ if (last.isPresent()) {
229
+ return last;
230
+ }
231
+ return lastLastPath;
232
+ }
233
+
234
+ @JsonIgnore
235
+ public int getTaskCount()
236
+ {
237
+ return tasks.size();
238
+ }
239
+
240
+ @JsonIgnore
241
+ public List<String> get(int i)
242
+ {
243
+ return new EntryList(data, tasks.get(i));
244
+ }
245
+
246
+ @JsonProperty("data")
247
+ @Deprecated
248
+ public byte[] getData()
249
+ {
250
+ return data;
251
+ }
252
+
253
+ @JsonProperty("tasks")
254
+ @Deprecated
255
+ public List<List<Entry>> getTasks()
256
+ {
257
+ return tasks;
258
+ }
259
+
260
+ @JsonProperty("last")
261
+ @Deprecated
262
+ public Optional<String> getLast()
263
+ {
264
+ return last;
265
+ }
266
+
267
+ private class EntryList
268
+ extends AbstractList<String>
269
+ {
270
+ private final byte[] data;
271
+ private final List<Entry> entries;
272
+ private InputStream stream;
273
+ private int current;
274
+
275
+ private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
276
+
277
+ public EntryList(byte[] data, List<Entry> entries)
278
+ {
279
+ this.data = data;
280
+ this.entries = entries;
281
+ try {
282
+ this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
283
+ }
284
+ catch (IOException ex) {
285
+ throw Throwables.propagate(ex);
286
+ }
287
+ this.current = 0;
288
+ }
289
+
290
+ @Override
291
+ public synchronized String get(int i)
292
+ {
293
+ Entry e = entries.get(i);
294
+ if (e.getIndex() < current) {
295
+ // rewind to the head
296
+ try {
297
+ stream.close();
298
+ stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
299
+ }
300
+ catch (IOException ex) {
301
+ throw Throwables.propagate(ex);
302
+ }
303
+ current = 0;
304
+ }
305
+
306
+ while (current < e.getIndex()) {
307
+ readNext();
308
+ }
309
+ // now current == e.getIndex()
310
+ return readNextString();
311
+ }
312
+
313
+ @Override
314
+ public int size()
315
+ {
316
+ return entries.size();
317
+ }
318
+
319
+ private byte[] readNext()
320
+ {
321
+ try {
322
+ stream.read(castBuffer.array());
323
+ int n = castBuffer.getInt(0);
324
+ byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
325
+ stream.read(b);
326
+
327
+ current++;
328
+
329
+ return b;
330
+ }
331
+ catch (IOException ex) {
332
+ throw Throwables.propagate(ex);
333
+ }
334
+ }
335
+
336
+ private String readNextString()
337
+ {
338
+ return new String(readNext(), StandardCharsets.UTF_8);
339
+ }
340
+ }
341
+ }
@@ -93,18 +93,6 @@ public class TestAzureBlobStorageFileInputPlugin
93
93
  assertEquals(5, task.getMaxConnectionRetry());
94
94
  }
95
95
 
96
- public ConfigSource config()
97
- {
98
- return Exec.newConfigSource()
99
- .set("account_name", AZURE_ACCOUNT_NAME)
100
- .set("account_key", AZURE_ACCOUNT_KEY)
101
- .set("container", AZURE_CONTAINER)
102
- .set("path_prefix", AZURE_PATH_PREFIX)
103
- .set("last_path", "")
104
- .set("file_ext", ".csv")
105
- .set("parser", parserConfig(schemaConfig()));
106
- }
107
-
108
96
  @Test(expected = ConfigException.class)
109
97
  public void checkDefaultValuesAccountNameIsNull()
110
98
  {
@@ -114,7 +102,6 @@ public class TestAzureBlobStorageFileInputPlugin
114
102
  .set("container", AZURE_CONTAINER)
115
103
  .set("path_prefix", AZURE_PATH_PREFIX)
116
104
  .set("last_path", "")
117
- .set("file_ext", ".csv")
118
105
  .set("parser", parserConfig(schemaConfig()));
119
106
 
120
107
  runner.transaction(config, new Control());
@@ -129,7 +116,6 @@ public class TestAzureBlobStorageFileInputPlugin
129
116
  .set("container", AZURE_CONTAINER)
130
117
  .set("path_prefix", AZURE_PATH_PREFIX)
131
118
  .set("last_path", "")
132
- .set("file_ext", ".csv")
133
119
  .set("parser", parserConfig(schemaConfig()));
134
120
 
135
121
  runner.transaction(config, new Control());
@@ -144,7 +130,6 @@ public class TestAzureBlobStorageFileInputPlugin
144
130
  .set("container", null)
145
131
  .set("path_prefix", AZURE_PATH_PREFIX)
146
132
  .set("last_path", "")
147
- .set("file_ext", ".csv")
148
133
  .set("parser", parserConfig(schemaConfig()));
149
134
 
150
135
  runner.transaction(config, new Control());
@@ -166,7 +151,7 @@ public class TestAzureBlobStorageFileInputPlugin
166
151
  public void testResume()
167
152
  {
168
153
  PluginTask task = config.loadConfig(PluginTask.class);
169
- task.setFiles(Arrays.asList("in/aa/a"));
154
+ task.setFiles(createFileList(Arrays.asList("in/aa/a"), task));
170
155
  ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
171
156
  {
172
157
  @Override
@@ -190,8 +175,8 @@ public class TestAzureBlobStorageFileInputPlugin
190
175
  throws NoSuchMethodException, IllegalAccessException, InvocationTargetException
191
176
  {
192
177
  List<String> expected = Arrays.asList(
193
- AZURE_CONTAINER_IMPORT_DIRECTORY + "sample_01.csv",
194
- AZURE_CONTAINER_IMPORT_DIRECTORY + "sample_02.csv"
178
+ AZURE_CONTAINER_IMPORT_DIRECTORY + "sample_01.csv",
179
+ AZURE_CONTAINER_IMPORT_DIRECTORY + "sample_02.csv"
195
180
  );
196
181
 
197
182
  PluginTask task = config.loadConfig(PluginTask.class);
@@ -210,8 +195,9 @@ public class TestAzureBlobStorageFileInputPlugin
210
195
 
211
196
  Method listFiles = AzureBlobStorageFileInputPlugin.class.getDeclaredMethod("listFiles", CloudBlobClient.class, PluginTask.class);
212
197
  listFiles.setAccessible(true);
213
- List<String> actual = (List<String>) listFiles.invoke(plugin, client, task);
214
- assertEquals(expected, actual);
198
+ FileList actual = (FileList) listFiles.invoke(plugin, client, task);
199
+ assertEquals(expected.get(0), actual.get(0).get(0));
200
+ assertEquals(expected.get(1), actual.get(1).get(0));
215
201
  assertEquals(AZURE_CONTAINER_IMPORT_DIRECTORY + "sample_02.csv", configDiff.get(String.class, "last_path"));
216
202
  }
217
203
 
@@ -228,11 +214,26 @@ public class TestAzureBlobStorageFileInputPlugin
228
214
 
229
215
  Method listFiles = AzureBlobStorageFileInputPlugin.class.getDeclaredMethod("listFiles", CloudBlobClient.class, PluginTask.class);
230
216
  listFiles.setAccessible(true);
231
- task.setFiles((List<String>) listFiles.invoke(plugin, client, task));
217
+ task.setFiles((FileList) listFiles.invoke(plugin, client, task));
232
218
 
233
219
  assertRecords(config, output);
234
220
  }
235
221
 
222
+ @Test
223
+ public void testCreateNextToken() throws Exception
224
+ {
225
+ Method base64Encode = AzureBlobStorageFileInputPlugin.class.getDeclaredMethod("createNextToken", String.class);
226
+ base64Encode.setAccessible(true);
227
+
228
+ String expected = "2!92!MDAwMDI1IXJlYWRvbmx5L3NhbXBsZV8wMS50c3YuZ3ohMDAwMDI4ITk5OTktMTItMzFUMjM6NTk6NTkuOTk5OTk5OVoh";
229
+ String lastPath = "readonly/sample_01.tsv.gz";
230
+ assertEquals(expected, base64Encode.invoke(plugin, lastPath).toString());
231
+
232
+ expected = "2!120!MDAwMDQ2IXBhdGgvdGhhdC9oYXZlL2xvbmcvcGF0aC9uYW1lL3NhbXBsZV8wMS50c3YuZ3ohMDAwMDI4ITk5OTktMTItMzFUMjM6NTk6NTkuOTk5OTk5OVoh";
233
+ lastPath = "path/that/have/long/path/name/sample_01.tsv.gz";
234
+ assertEquals(expected, base64Encode.invoke(plugin, lastPath).toString());
235
+ }
236
+
236
237
  static List<TaskReport> emptyTaskReports(int taskCount)
237
238
  {
238
239
  ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
@@ -256,6 +257,17 @@ public class TestAzureBlobStorageFileInputPlugin
256
257
  }
257
258
  }
258
259
 
260
+ public ConfigSource config()
261
+ {
262
+ return Exec.newConfigSource()
263
+ .set("account_name", AZURE_ACCOUNT_NAME)
264
+ .set("account_key", AZURE_ACCOUNT_KEY)
265
+ .set("container", AZURE_CONTAINER)
266
+ .set("path_prefix", AZURE_PATH_PREFIX)
267
+ .set("last_path", "")
268
+ .set("parser", parserConfig(schemaConfig()));
269
+ }
270
+
259
271
  private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
260
272
  {
261
273
  ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
@@ -340,4 +352,13 @@ public class TestAzureBlobStorageFileInputPlugin
340
352
  }
341
353
  return dir;
342
354
  }
355
+
356
+ private FileList createFileList(List<String> fileList, PluginTask task)
357
+ {
358
+ FileList.Builder builder = new FileList.Builder(task);
359
+ for (String file : fileList) {
360
+ builder.add(file, 0);
361
+ }
362
+ return builder.build();
363
+ }
343
364
  }
@@ -0,0 +1,87 @@
1
+ package org.embulk.input.azure_blob_storage;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.config.ConfigSource;
5
+ import org.junit.Before;
6
+ import org.junit.Rule;
7
+ import org.junit.Test;
8
+
9
+ import static org.junit.Assert.assertEquals;
10
+
11
+ public class TestFileList
12
+ {
13
+ @Rule
14
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
15
+
16
+ private ConfigSource config;
17
+
18
+ @Before
19
+ public void createConfigSource()
20
+ {
21
+ config = runtime.getExec().newConfigSource();
22
+ }
23
+
24
+ @Test
25
+ public void checkMinTaskSize()
26
+ throws Exception
27
+ {
28
+ { // not specify min_task_size
29
+ FileList fileList = newFileList(config.deepCopy(),
30
+ "sample_00", 100L,
31
+ "sample_01", 150L,
32
+ "sample_02", 350L);
33
+
34
+ assertEquals(3, fileList.getTaskCount());
35
+ assertEquals("sample_00", fileList.get(0).get(0));
36
+ assertEquals("sample_01", fileList.get(1).get(0));
37
+ assertEquals("sample_02", fileList.get(2).get(0));
38
+ }
39
+
40
+ {
41
+ FileList fileList = newFileList(config.deepCopy().set("min_task_size", 100),
42
+ "sample_00", 100L,
43
+ "sample_01", 150L,
44
+ "sample_02", 350L);
45
+
46
+ assertEquals(3, fileList.getTaskCount());
47
+ assertEquals("sample_00", fileList.get(0).get(0));
48
+ assertEquals("sample_01", fileList.get(1).get(0));
49
+ assertEquals("sample_02", fileList.get(2).get(0));
50
+ }
51
+
52
+ {
53
+ FileList fileList = newFileList(config.deepCopy().set("min_task_size", 200),
54
+ "sample_00", 100L,
55
+ "sample_01", 150L,
56
+ "sample_02", 350L);
57
+
58
+ assertEquals(2, fileList.getTaskCount());
59
+ assertEquals("sample_00", fileList.get(0).get(0));
60
+ assertEquals("sample_01", fileList.get(0).get(1));
61
+ assertEquals("sample_02", fileList.get(1).get(0));
62
+ }
63
+
64
+ {
65
+ FileList fileList = newFileList(config.deepCopy().set("min_task_size", 700),
66
+ "sample_00", 100L,
67
+ "sample_01", 150L,
68
+ "sample_02", 350L);
69
+
70
+ assertEquals(1, fileList.getTaskCount());
71
+ assertEquals("sample_00", fileList.get(0).get(0));
72
+ assertEquals("sample_01", fileList.get(0).get(1));
73
+ assertEquals("sample_02", fileList.get(0).get(2));
74
+ }
75
+ }
76
+
77
+ private static FileList newFileList(ConfigSource config, Object... nameAndSize)
78
+ {
79
+ FileList.Builder builder = new FileList.Builder(config);
80
+
81
+ for (int i = 0; i < nameAndSize.length; i += 2) {
82
+ builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
83
+ }
84
+
85
+ return builder.build();
86
+ }
87
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-azure_blob_storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Akama
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-16 00:00:00.000000000 Z
11
+ date: 2016-03-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -57,12 +57,14 @@ files:
57
57
  - gradlew.bat
58
58
  - lib/embulk/input/azure_blob_storage.rb
59
59
  - src/main/java/org/embulk/input/azure_blob_storage/AzureBlobStorageFileInputPlugin.java
60
+ - src/main/java/org/embulk/input/azure_blob_storage/FileList.java
60
61
  - src/test/java/org/embulk/input/azure_blob_storage/TestAzureBlobStorageFileInputPlugin.java
62
+ - src/test/java/org/embulk/input/azure_blob_storage/TestFileList.java
61
63
  - src/test/resources/sample_01.csv
62
64
  - src/test/resources/sample_02.csv
63
65
  - classpath/azure-storage-4.0.0.jar
64
66
  - classpath/commons-lang3-3.4.jar
65
- - classpath/embulk-input-azure_blob_storage-0.1.3.jar
67
+ - classpath/embulk-input-azure_blob_storage-0.1.4.jar
66
68
  - classpath/jackson-core-2.6.0.jar
67
69
  homepage: https://github.com/sakama/embulk-input-azure_blob_storage
68
70
  licenses: