embulk-input-azure_blob_storage 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +3 -3
- data/build.gradle +3 -3
- data/src/main/java/org/embulk/input/azure_blob_storage/AzureBlobStorageFileInputPlugin.java +122 -55
- data/src/test/java/org/embulk/input/azure_blob_storage/TestAzureBlobStorageFileInputPlugin.java +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60cc4b85fafbfd719fdb70232f8c7b44c3153d9f
|
4
|
+
data.tar.gz: 50af2362a8edbc752bbc1116a36ede8a3f64aa84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b11039548309b1d49bc7b87e54a6ff60eb36863e8f75127f087f71520a957d833aac197f577dbbf262ae0d68a656edd92b0c5fa5ed42fb7c90c62f8e5f551b15
|
7
|
+
data.tar.gz: ecb4b533a64440f78a07ebdb8e1a7b5a4dba91a79a4c7121e32463ecdb4e5746d26186078ae938dd73774273001d917ac09b914d20fa312690ccde91a49986d5
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.1.5 - 2015-03-30
|
2
|
+
|
3
|
+
* [maintenance] Use RetryExecutor when retry is needed [#8](https://github.com/sakama/embulk-input-azure_blob_storage/pull/8)
|
4
|
+
|
1
5
|
## 0.1.4 - 2015-03-22
|
2
6
|
|
3
7
|
* [new feature] Support `last_path` option [#7](https://github.com/sakama/embulk-input-azure_blob_storage/pull/7)
|
@@ -15,4 +19,4 @@
|
|
15
19
|
## 0.1.1 - 2015-10-11
|
16
20
|
|
17
21
|
* [maintenance] Change 'DefaultEndpointsProtocol' from http to https [#1](https://github.com/sakama/embulk-input-azure_blob_storage/pull/1)
|
18
|
-
* [maintenance] Specify targetCompatibility = 1.7
|
22
|
+
* [maintenance] Specify targetCompatibility = 1.7
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# Azure
|
1
|
+
# Azure Blob Storage file input plugin for Embulk
|
2
2
|
|
3
|
-
[Embulk](http://www.embulk.org/) file input plugin read files stored on [Microsoft Azure](https://azure.microsoft.com/) [
|
3
|
+
[Embulk](http://www.embulk.org/) file input plugin read files stored on [Microsoft Azure](https://azure.microsoft.com/) [Blob Storage](https://azure.microsoft.com/en-us/documentation/articles/storage-introduction/#blob-storage)
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
@@ -129,4 +129,4 @@ $ launchctl load ~/Library/LaunchAgents/environment.plist
|
|
129
129
|
$ launchctl getenv AZURE_ACCOUNT_NAME //try to get value.
|
130
130
|
|
131
131
|
Then start your applications.
|
132
|
-
```
|
132
|
+
```
|
data/build.gradle
CHANGED
@@ -17,7 +17,7 @@ configurations {
|
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.5"
|
21
21
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.2"
|
@@ -65,8 +65,8 @@ Gem::Specification.new do |spec|
|
|
65
65
|
spec.name = "${project.name}"
|
66
66
|
spec.version = "${project.version}"
|
67
67
|
spec.authors = ["Satoshi Akama"]
|
68
|
-
spec.summary = %[Microsoft Azure
|
69
|
-
spec.description = %[Reads files stored on Microsoft Azure
|
68
|
+
spec.summary = %[Microsoft Azure Blob Storage file input plugin for Embulk]
|
69
|
+
spec.description = %[Reads files stored on Microsoft Azure Blob Storage.]
|
70
70
|
spec.email = ["satoshiakama@gmail.com"]
|
71
71
|
spec.licenses = ["Apache-2.0"]
|
72
72
|
spec.homepage = "https://github.com/sakama/embulk-input-azure_blob_storage"
|
@@ -27,7 +27,10 @@ import org.embulk.spi.Exec;
|
|
27
27
|
import org.embulk.spi.FileInputPlugin;
|
28
28
|
import org.embulk.spi.TransactionalFileInput;
|
29
29
|
import org.embulk.spi.util.InputStreamFileInput;
|
30
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
31
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
30
32
|
import org.slf4j.Logger;
|
33
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
31
34
|
|
32
35
|
import java.io.IOException;
|
33
36
|
import java.io.InputStream;
|
@@ -63,7 +66,7 @@ public class AzureBlobStorageFileInputPlugin
|
|
63
66
|
int getMaxResults();
|
64
67
|
|
65
68
|
@Config("max_connection_retry")
|
66
|
-
@ConfigDefault("
|
69
|
+
@ConfigDefault("10") // 10 times retry to connect sftp server if failed.
|
67
70
|
int getMaxConnectionRetry();
|
68
71
|
|
69
72
|
FileList getFiles();
|
@@ -126,44 +129,85 @@ public class AzureBlobStorageFileInputPlugin
|
|
126
129
|
}
|
127
130
|
FileList.Builder builder = new FileList.Builder(task);
|
128
131
|
|
129
|
-
return listFilesWithPrefix(builder, client, task.getContainer(), task.getPathPrefix(),
|
132
|
+
return listFilesWithPrefix(builder, client, task.getContainer(), task.getPathPrefix(),
|
133
|
+
task.getLastPath(), task.getMaxResults(), task.getMaxConnectionRetry());
|
130
134
|
}
|
131
135
|
|
132
|
-
private static FileList listFilesWithPrefix(FileList.Builder builder, CloudBlobClient client,
|
133
|
-
|
136
|
+
private static FileList listFilesWithPrefix(final FileList.Builder builder, final CloudBlobClient client,
|
137
|
+
final String containerName, final String prefix, final Optional<String> lastPath,
|
138
|
+
final int maxResults, final int maxConnectionRetry)
|
134
139
|
{
|
135
|
-
String lastKey = (lastPath.isPresent() && !lastPath.get().isEmpty()) ? createNextToken(lastPath.get()) : null;
|
136
|
-
ResultContinuation token = null;
|
137
|
-
if (lastKey != null) {
|
138
|
-
token = new ResultContinuation();
|
139
|
-
token.setContinuationType(ResultContinuationType.BLOB);
|
140
|
-
log.debug("lastPath: {}", lastPath.get());
|
141
|
-
log.debug("lastPath(Base64encoded): {}", lastKey);
|
142
|
-
token.setNextMarker(lastKey);
|
143
|
-
}
|
144
|
-
|
140
|
+
final String lastKey = (lastPath.isPresent() && !lastPath.get().isEmpty()) ? createNextToken(lastPath.get()) : null;
|
145
141
|
try {
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
142
|
+
return retryExecutor()
|
143
|
+
.withRetryLimit(maxConnectionRetry)
|
144
|
+
.withInitialRetryWait(500)
|
145
|
+
.withMaxRetryWait(30 * 1000)
|
146
|
+
.runInterruptible(new Retryable<FileList>() {
|
147
|
+
@Override
|
148
|
+
public FileList call() throws StorageException, URISyntaxException, IOException
|
149
|
+
{
|
150
|
+
ResultContinuation token = null;
|
151
|
+
if (lastKey != null) {
|
152
|
+
token = new ResultContinuation();
|
153
|
+
token.setContinuationType(ResultContinuationType.BLOB);
|
154
|
+
log.debug("lastPath: {}", lastPath.get());
|
155
|
+
log.debug("lastPath(Base64encoded): {}", lastKey);
|
156
|
+
token.setNextMarker(lastKey);
|
157
|
+
}
|
158
|
+
|
159
|
+
CloudBlobContainer container = client.getContainerReference(containerName);
|
160
|
+
ResultSegment<ListBlobItem> blobs;
|
161
|
+
do {
|
162
|
+
blobs = container.listBlobsSegmented(prefix, true, null, maxResults, token, null, null);
|
163
|
+
log.debug(String.format("result count(include directory):%s continuationToken:%s", blobs.getLength(), blobs.getContinuationToken()));
|
164
|
+
for (ListBlobItem blobItem : blobs.getResults()) {
|
165
|
+
if (blobItem instanceof CloudBlob) {
|
166
|
+
CloudBlob blob = (CloudBlob) blobItem;
|
167
|
+
if (blob.exists() && !blob.getUri().toString().endsWith("/")) {
|
168
|
+
builder.add(blob.getName(), blob.getProperties().getLength());
|
169
|
+
log.debug(String.format("name:%s, class:%s, uri:%s", blob.getName(), blob.getClass(), blob.getUri()));
|
170
|
+
}
|
171
|
+
}
|
172
|
+
}
|
173
|
+
token = blobs.getContinuationToken();
|
174
|
+
} while (blobs.getContinuationToken() != null);
|
175
|
+
return builder.build();
|
176
|
+
}
|
177
|
+
|
178
|
+
@Override
|
179
|
+
public boolean isRetryableException(Exception exception)
|
180
|
+
{
|
181
|
+
return true;
|
182
|
+
}
|
183
|
+
|
184
|
+
@Override
|
185
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
186
|
+
throws RetryGiveupException
|
187
|
+
{
|
188
|
+
String message = String.format("SFTP GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
189
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
190
|
+
if (retryCount % 3 == 0) {
|
191
|
+
log.warn(message, exception);
|
192
|
+
}
|
193
|
+
else {
|
194
|
+
log.warn(message);
|
195
|
+
}
|
157
196
|
}
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
197
|
+
|
198
|
+
@Override
|
199
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
200
|
+
throws RetryGiveupException
|
201
|
+
{
|
202
|
+
}
|
203
|
+
});
|
204
|
+
}
|
205
|
+
catch (RetryGiveupException ex) {
|
206
|
+
throw Throwables.propagate(ex.getCause());
|
162
207
|
}
|
163
|
-
catch (
|
208
|
+
catch (InterruptedException ex) {
|
164
209
|
throw Throwables.propagate(ex);
|
165
210
|
}
|
166
|
-
return builder.build();
|
167
211
|
}
|
168
212
|
|
169
213
|
@Override
|
@@ -215,31 +259,54 @@ public class AzureBlobStorageFileInputPlugin
|
|
215
259
|
if (opened || !iterator.hasNext()) {
|
216
260
|
return null;
|
217
261
|
}
|
262
|
+
final String key = iterator.next();
|
218
263
|
opened = true;
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
264
|
+
try {
|
265
|
+
return retryExecutor()
|
266
|
+
.withRetryLimit(maxConnectionRetry)
|
267
|
+
.withInitialRetryWait(500)
|
268
|
+
.withMaxRetryWait(30 * 1000)
|
269
|
+
.runInterruptible(new Retryable<InputStream>() {
|
270
|
+
@Override
|
271
|
+
public InputStream call() throws StorageException, URISyntaxException, IOException
|
272
|
+
{
|
273
|
+
CloudBlobContainer container = client.getContainerReference(containerName);
|
274
|
+
CloudBlob blob = container.getBlockBlobReference(key);
|
275
|
+
return blob.openInputStream();
|
276
|
+
}
|
277
|
+
|
278
|
+
@Override
|
279
|
+
public boolean isRetryableException(Exception exception)
|
280
|
+
{
|
281
|
+
return true;
|
282
|
+
}
|
283
|
+
|
284
|
+
@Override
|
285
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
286
|
+
throws RetryGiveupException
|
287
|
+
{
|
288
|
+
String message = String.format("Azure Blob Storage GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
289
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
290
|
+
if (retryCount % 3 == 0) {
|
291
|
+
log.warn(message, exception);
|
292
|
+
}
|
293
|
+
else {
|
294
|
+
log.warn(message);
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
@Override
|
299
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
300
|
+
throws RetryGiveupException
|
301
|
+
{
|
302
|
+
}
|
303
|
+
});
|
304
|
+
}
|
305
|
+
catch (RetryGiveupException ex) {
|
306
|
+
throw Throwables.propagate(ex.getCause());
|
307
|
+
}
|
308
|
+
catch (InterruptedException ex) {
|
309
|
+
throw Throwables.propagate(ex);
|
243
310
|
}
|
244
311
|
}
|
245
312
|
|
data/src/test/java/org/embulk/input/azure_blob_storage/TestAzureBlobStorageFileInputPlugin.java
CHANGED
@@ -90,7 +90,7 @@ public class TestAzureBlobStorageFileInputPlugin
|
|
90
90
|
|
91
91
|
PluginTask task = config.loadConfig(PluginTask.class);
|
92
92
|
assertEquals(5000, task.getMaxResults());
|
93
|
-
assertEquals(
|
93
|
+
assertEquals(10, task.getMaxConnectionRetry());
|
94
94
|
}
|
95
95
|
|
96
96
|
@Test(expected = ConfigException.class)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Reads files stored on Microsoft Azure
|
41
|
+
description: Reads files stored on Microsoft Azure Blob Storage.
|
42
42
|
email:
|
43
43
|
- satoshiakama@gmail.com
|
44
44
|
executables: []
|
@@ -64,7 +64,7 @@ files:
|
|
64
64
|
- src/test/resources/sample_02.csv
|
65
65
|
- classpath/azure-storage-4.0.0.jar
|
66
66
|
- classpath/commons-lang3-3.4.jar
|
67
|
-
- classpath/embulk-input-azure_blob_storage-0.1.
|
67
|
+
- classpath/embulk-input-azure_blob_storage-0.1.5.jar
|
68
68
|
- classpath/jackson-core-2.6.0.jar
|
69
69
|
homepage: https://github.com/sakama/embulk-input-azure_blob_storage
|
70
70
|
licenses:
|
@@ -89,5 +89,5 @@ rubyforge_project:
|
|
89
89
|
rubygems_version: 2.1.9
|
90
90
|
signing_key:
|
91
91
|
specification_version: 4
|
92
|
-
summary: Microsoft Azure
|
92
|
+
summary: Microsoft Azure Blob Storage file input plugin for Embulk
|
93
93
|
test_files: []
|