embulk-input-azure_blob_storage 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +3 -3
- data/build.gradle +3 -3
- data/src/main/java/org/embulk/input/azure_blob_storage/AzureBlobStorageFileInputPlugin.java +122 -55
- data/src/test/java/org/embulk/input/azure_blob_storage/TestAzureBlobStorageFileInputPlugin.java +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60cc4b85fafbfd719fdb70232f8c7b44c3153d9f
|
4
|
+
data.tar.gz: 50af2362a8edbc752bbc1116a36ede8a3f64aa84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b11039548309b1d49bc7b87e54a6ff60eb36863e8f75127f087f71520a957d833aac197f577dbbf262ae0d68a656edd92b0c5fa5ed42fb7c90c62f8e5f551b15
|
7
|
+
data.tar.gz: ecb4b533a64440f78a07ebdb8e1a7b5a4dba91a79a4c7121e32463ecdb4e5746d26186078ae938dd73774273001d917ac09b914d20fa312690ccde91a49986d5
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.1.5 - 2015-03-30
|
2
|
+
|
3
|
+
* [maintenance] Use RetryExecutor when retry is needed [#8](https://github.com/sakama/embulk-input-azure_blob_storage/pull/8)
|
4
|
+
|
1
5
|
## 0.1.4 - 2015-03-22
|
2
6
|
|
3
7
|
* [new feature] Support `last_path` option [#7](https://github.com/sakama/embulk-input-azure_blob_storage/pull/7)
|
@@ -15,4 +19,4 @@
|
|
15
19
|
## 0.1.1 - 2015-10-11
|
16
20
|
|
17
21
|
* [maintenance] Change 'DefaultEndpointsProtocol' from http to https [#1](https://github.com/sakama/embulk-input-azure_blob_storage/pull/1)
|
18
|
-
* [maintenance] Specify targetCompatibility = 1.7
|
22
|
+
* [maintenance] Specify targetCompatibility = 1.7
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# Azure
|
1
|
+
# Azure Blob Storage file input plugin for Embulk
|
2
2
|
|
3
|
-
[Embulk](http://www.embulk.org/) file input plugin read files stored on [Microsoft Azure](https://azure.microsoft.com/) [
|
3
|
+
[Embulk](http://www.embulk.org/) file input plugin read files stored on [Microsoft Azure](https://azure.microsoft.com/) [Blob Storage](https://azure.microsoft.com/en-us/documentation/articles/storage-introduction/#blob-storage)
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
@@ -129,4 +129,4 @@ $ launchctl load ~/Library/LaunchAgents/environment.plist
|
|
129
129
|
$ launchctl getenv AZURE_ACCOUNT_NAME //try to get value.
|
130
130
|
|
131
131
|
Then start your applications.
|
132
|
-
```
|
132
|
+
```
|
data/build.gradle
CHANGED
@@ -17,7 +17,7 @@ configurations {
|
|
17
17
|
sourceCompatibility = 1.7
|
18
18
|
targetCompatibility = 1.7
|
19
19
|
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.5"
|
21
21
|
|
22
22
|
dependencies {
|
23
23
|
compile "org.embulk:embulk-core:0.8.2"
|
@@ -65,8 +65,8 @@ Gem::Specification.new do |spec|
|
|
65
65
|
spec.name = "${project.name}"
|
66
66
|
spec.version = "${project.version}"
|
67
67
|
spec.authors = ["Satoshi Akama"]
|
68
|
-
spec.summary = %[Microsoft Azure
|
69
|
-
spec.description = %[Reads files stored on Microsoft Azure
|
68
|
+
spec.summary = %[Microsoft Azure Blob Storage file input plugin for Embulk]
|
69
|
+
spec.description = %[Reads files stored on Microsoft Azure Blob Storage.]
|
70
70
|
spec.email = ["satoshiakama@gmail.com"]
|
71
71
|
spec.licenses = ["Apache-2.0"]
|
72
72
|
spec.homepage = "https://github.com/sakama/embulk-input-azure_blob_storage"
|
@@ -27,7 +27,10 @@ import org.embulk.spi.Exec;
|
|
27
27
|
import org.embulk.spi.FileInputPlugin;
|
28
28
|
import org.embulk.spi.TransactionalFileInput;
|
29
29
|
import org.embulk.spi.util.InputStreamFileInput;
|
30
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
31
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
30
32
|
import org.slf4j.Logger;
|
33
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
31
34
|
|
32
35
|
import java.io.IOException;
|
33
36
|
import java.io.InputStream;
|
@@ -63,7 +66,7 @@ public class AzureBlobStorageFileInputPlugin
|
|
63
66
|
int getMaxResults();
|
64
67
|
|
65
68
|
@Config("max_connection_retry")
|
66
|
-
@ConfigDefault("
|
69
|
+
@ConfigDefault("10") // 10 times retry to connect sftp server if failed.
|
67
70
|
int getMaxConnectionRetry();
|
68
71
|
|
69
72
|
FileList getFiles();
|
@@ -126,44 +129,85 @@ public class AzureBlobStorageFileInputPlugin
|
|
126
129
|
}
|
127
130
|
FileList.Builder builder = new FileList.Builder(task);
|
128
131
|
|
129
|
-
return listFilesWithPrefix(builder, client, task.getContainer(), task.getPathPrefix(),
|
132
|
+
return listFilesWithPrefix(builder, client, task.getContainer(), task.getPathPrefix(),
|
133
|
+
task.getLastPath(), task.getMaxResults(), task.getMaxConnectionRetry());
|
130
134
|
}
|
131
135
|
|
132
|
-
private static FileList listFilesWithPrefix(FileList.Builder builder, CloudBlobClient client,
|
133
|
-
|
136
|
+
private static FileList listFilesWithPrefix(final FileList.Builder builder, final CloudBlobClient client,
|
137
|
+
final String containerName, final String prefix, final Optional<String> lastPath,
|
138
|
+
final int maxResults, final int maxConnectionRetry)
|
134
139
|
{
|
135
|
-
String lastKey = (lastPath.isPresent() && !lastPath.get().isEmpty()) ? createNextToken(lastPath.get()) : null;
|
136
|
-
ResultContinuation token = null;
|
137
|
-
if (lastKey != null) {
|
138
|
-
token = new ResultContinuation();
|
139
|
-
token.setContinuationType(ResultContinuationType.BLOB);
|
140
|
-
log.debug("lastPath: {}", lastPath.get());
|
141
|
-
log.debug("lastPath(Base64encoded): {}", lastKey);
|
142
|
-
token.setNextMarker(lastKey);
|
143
|
-
}
|
144
|
-
|
140
|
+
final String lastKey = (lastPath.isPresent() && !lastPath.get().isEmpty()) ? createNextToken(lastPath.get()) : null;
|
145
141
|
try {
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
142
|
+
return retryExecutor()
|
143
|
+
.withRetryLimit(maxConnectionRetry)
|
144
|
+
.withInitialRetryWait(500)
|
145
|
+
.withMaxRetryWait(30 * 1000)
|
146
|
+
.runInterruptible(new Retryable<FileList>() {
|
147
|
+
@Override
|
148
|
+
public FileList call() throws StorageException, URISyntaxException, IOException
|
149
|
+
{
|
150
|
+
ResultContinuation token = null;
|
151
|
+
if (lastKey != null) {
|
152
|
+
token = new ResultContinuation();
|
153
|
+
token.setContinuationType(ResultContinuationType.BLOB);
|
154
|
+
log.debug("lastPath: {}", lastPath.get());
|
155
|
+
log.debug("lastPath(Base64encoded): {}", lastKey);
|
156
|
+
token.setNextMarker(lastKey);
|
157
|
+
}
|
158
|
+
|
159
|
+
CloudBlobContainer container = client.getContainerReference(containerName);
|
160
|
+
ResultSegment<ListBlobItem> blobs;
|
161
|
+
do {
|
162
|
+
blobs = container.listBlobsSegmented(prefix, true, null, maxResults, token, null, null);
|
163
|
+
log.debug(String.format("result count(include directory):%s continuationToken:%s", blobs.getLength(), blobs.getContinuationToken()));
|
164
|
+
for (ListBlobItem blobItem : blobs.getResults()) {
|
165
|
+
if (blobItem instanceof CloudBlob) {
|
166
|
+
CloudBlob blob = (CloudBlob) blobItem;
|
167
|
+
if (blob.exists() && !blob.getUri().toString().endsWith("/")) {
|
168
|
+
builder.add(blob.getName(), blob.getProperties().getLength());
|
169
|
+
log.debug(String.format("name:%s, class:%s, uri:%s", blob.getName(), blob.getClass(), blob.getUri()));
|
170
|
+
}
|
171
|
+
}
|
172
|
+
}
|
173
|
+
token = blobs.getContinuationToken();
|
174
|
+
} while (blobs.getContinuationToken() != null);
|
175
|
+
return builder.build();
|
176
|
+
}
|
177
|
+
|
178
|
+
@Override
|
179
|
+
public boolean isRetryableException(Exception exception)
|
180
|
+
{
|
181
|
+
return true;
|
182
|
+
}
|
183
|
+
|
184
|
+
@Override
|
185
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
186
|
+
throws RetryGiveupException
|
187
|
+
{
|
188
|
+
String message = String.format("SFTP GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
189
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
190
|
+
if (retryCount % 3 == 0) {
|
191
|
+
log.warn(message, exception);
|
192
|
+
}
|
193
|
+
else {
|
194
|
+
log.warn(message);
|
195
|
+
}
|
157
196
|
}
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
197
|
+
|
198
|
+
@Override
|
199
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
200
|
+
throws RetryGiveupException
|
201
|
+
{
|
202
|
+
}
|
203
|
+
});
|
204
|
+
}
|
205
|
+
catch (RetryGiveupException ex) {
|
206
|
+
throw Throwables.propagate(ex.getCause());
|
162
207
|
}
|
163
|
-
catch (
|
208
|
+
catch (InterruptedException ex) {
|
164
209
|
throw Throwables.propagate(ex);
|
165
210
|
}
|
166
|
-
return builder.build();
|
167
211
|
}
|
168
212
|
|
169
213
|
@Override
|
@@ -215,31 +259,54 @@ public class AzureBlobStorageFileInputPlugin
|
|
215
259
|
if (opened || !iterator.hasNext()) {
|
216
260
|
return null;
|
217
261
|
}
|
262
|
+
final String key = iterator.next();
|
218
263
|
opened = true;
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
264
|
+
try {
|
265
|
+
return retryExecutor()
|
266
|
+
.withRetryLimit(maxConnectionRetry)
|
267
|
+
.withInitialRetryWait(500)
|
268
|
+
.withMaxRetryWait(30 * 1000)
|
269
|
+
.runInterruptible(new Retryable<InputStream>() {
|
270
|
+
@Override
|
271
|
+
public InputStream call() throws StorageException, URISyntaxException, IOException
|
272
|
+
{
|
273
|
+
CloudBlobContainer container = client.getContainerReference(containerName);
|
274
|
+
CloudBlob blob = container.getBlockBlobReference(key);
|
275
|
+
return blob.openInputStream();
|
276
|
+
}
|
277
|
+
|
278
|
+
@Override
|
279
|
+
public boolean isRetryableException(Exception exception)
|
280
|
+
{
|
281
|
+
return true;
|
282
|
+
}
|
283
|
+
|
284
|
+
@Override
|
285
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
286
|
+
throws RetryGiveupException
|
287
|
+
{
|
288
|
+
String message = String.format("Azure Blob Storage GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
289
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
290
|
+
if (retryCount % 3 == 0) {
|
291
|
+
log.warn(message, exception);
|
292
|
+
}
|
293
|
+
else {
|
294
|
+
log.warn(message);
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
@Override
|
299
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
300
|
+
throws RetryGiveupException
|
301
|
+
{
|
302
|
+
}
|
303
|
+
});
|
304
|
+
}
|
305
|
+
catch (RetryGiveupException ex) {
|
306
|
+
throw Throwables.propagate(ex.getCause());
|
307
|
+
}
|
308
|
+
catch (InterruptedException ex) {
|
309
|
+
throw Throwables.propagate(ex);
|
243
310
|
}
|
244
311
|
}
|
245
312
|
|
data/src/test/java/org/embulk/input/azure_blob_storage/TestAzureBlobStorageFileInputPlugin.java
CHANGED
@@ -90,7 +90,7 @@ public class TestAzureBlobStorageFileInputPlugin
|
|
90
90
|
|
91
91
|
PluginTask task = config.loadConfig(PluginTask.class);
|
92
92
|
assertEquals(5000, task.getMaxResults());
|
93
|
-
assertEquals(
|
93
|
+
assertEquals(10, task.getMaxConnectionRetry());
|
94
94
|
}
|
95
95
|
|
96
96
|
@Test(expected = ConfigException.class)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-azure_blob_storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Reads files stored on Microsoft Azure
|
41
|
+
description: Reads files stored on Microsoft Azure Blob Storage.
|
42
42
|
email:
|
43
43
|
- satoshiakama@gmail.com
|
44
44
|
executables: []
|
@@ -64,7 +64,7 @@ files:
|
|
64
64
|
- src/test/resources/sample_02.csv
|
65
65
|
- classpath/azure-storage-4.0.0.jar
|
66
66
|
- classpath/commons-lang3-3.4.jar
|
67
|
-
- classpath/embulk-input-azure_blob_storage-0.1.
|
67
|
+
- classpath/embulk-input-azure_blob_storage-0.1.5.jar
|
68
68
|
- classpath/jackson-core-2.6.0.jar
|
69
69
|
homepage: https://github.com/sakama/embulk-input-azure_blob_storage
|
70
70
|
licenses:
|
@@ -89,5 +89,5 @@ rubyforge_project:
|
|
89
89
|
rubygems_version: 2.1.9
|
90
90
|
signing_key:
|
91
91
|
specification_version: 4
|
92
|
-
summary: Microsoft Azure
|
92
|
+
summary: Microsoft Azure Blob Storage file input plugin for Embulk
|
93
93
|
test_files: []
|