embulk-input-sftp 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/README.md +0 -2
- data/build.gradle +1 -1
- data/src/main/java/org/embulk/input/sftp/SftpFileInput.java +72 -50
- data/src/main/java/org/embulk/input/sftp/SingleFileProvider.java +54 -24
- data/src/test/java/org/embulk/input/sftp/TestSftpFileInputPlugin.java +36 -9
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 107c7dbf7c423b83b5677db6da16b272c8dccab4
|
4
|
+
data.tar.gz: edcada631a85858a63c57b7663aedbdb2ab365f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d28c5e3ba7db475342b75df902b2f588e3259384138e3b1ee696e6582aa1d9ea1fecf5e2f0955cda3a2e19454b69c1d1d346c53b1153030b6406e0680cf03519
|
7
|
+
data.tar.gz: 1e7b0e362480f2e610c87871f49f020458054ea5d114a6ab064dde3bf32cf021e44a1954a1ada5f7057fe85d4e780ceaec9b7c5c0e277af2d4f99b283646f26d
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
## 0.1.2 - 2015-03-23
|
2
|
+
|
3
|
+
* [maintenance] Use RetryExecutor when retrying that is provide by embulk-core [#9](https://github.com/sakama/embulk-input-sftp/pull/9)
|
4
|
+
|
1
5
|
## 0.1.1 - 2015-03-18
|
2
6
|
|
3
7
|
* [feature] Support last_path_ option [#2](https://github.com/sakama/embulk-input-sftp/pull/2)[#4](https://github.com/sakama/embulk-input-sftp/pull/4)[#7](https://github.com/sakama/embulk-input-sftp/pull/7)
|
4
8
|
* [feature] Support path_match_pattern option [#6](https://github.com/sakama/embulk-input-sftp/pull/6)
|
5
9
|
* [maintenance] Add unit test [#3](https://github.com/sakama/embulk-input-sftp/pull/3)
|
6
|
-
* [maintenance] Skip retry of file downloading when permission denied error happens [#1](https://github.com/sakama/embulk-input-sftp/pull/1)
|
10
|
+
* [maintenance] Skip retry of file downloading when permission denied error happens [#1](https://github.com/sakama/embulk-input-sftp/pull/1)
|
data/README.md
CHANGED
@@ -21,8 +21,6 @@ Reads files stored on remote server using SFTP
|
|
21
21
|
- **path_prefix**: Prefix of output paths (string, required)
|
22
22
|
- **path_match_pattern**: regexp to match file paths. If a file path doesn't match with this pattern, the file will be skipped (regexp string, optional)
|
23
23
|
- **total_file_count_limit**: maximum number of files to read (integer, optional)
|
24
|
-
- **file_ext**: Extension of output files (string, required)
|
25
|
-
- **sequence_format**: Format for sequence part of output files (string, default: `".%03d.%02d"`)
|
26
24
|
- **min_task_size (experimental)**: minimum size of a task. If this is larger than 0, one task includes multiple input files. This is useful if too many number of tasks impacts performance of output or executor plugins badly. (integer, optional)
|
27
25
|
|
28
26
|
### Proxy configuration
|
data/build.gradle
CHANGED
@@ -16,9 +16,13 @@ import org.embulk.spi.Exec;
|
|
16
16
|
import org.embulk.spi.TransactionalFileInput;
|
17
17
|
import org.embulk.spi.unit.LocalFile;
|
18
18
|
import org.embulk.spi.util.InputStreamFileInput;
|
19
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
20
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
19
21
|
import org.slf4j.Logger;
|
22
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
20
23
|
|
21
24
|
import java.io.File;
|
25
|
+
import java.io.IOException;
|
22
26
|
import java.net.URI;
|
23
27
|
import java.net.URISyntaxException;
|
24
28
|
|
@@ -56,6 +60,7 @@ public class SftpFileInput
|
|
56
60
|
}
|
57
61
|
|
58
62
|
StandardFileSystemManager manager = new StandardFileSystemManager();
|
63
|
+
manager.setClassLoader(SftpFileInput.class.getClassLoader());
|
59
64
|
try {
|
60
65
|
manager.init();
|
61
66
|
}
|
@@ -149,64 +154,81 @@ public class SftpFileInput
|
|
149
154
|
}
|
150
155
|
}
|
151
156
|
|
152
|
-
public static FileList listFilesByPrefix(PluginTask task)
|
157
|
+
public static FileList listFilesByPrefix(final PluginTask task)
|
153
158
|
{
|
154
|
-
FileList.Builder builder = new FileList.Builder(task);
|
159
|
+
final FileList.Builder builder = new FileList.Builder(task);
|
155
160
|
int maxConnectionRetry = task.getMaxConnectionRetry();
|
156
|
-
String lastKey = null;
|
157
161
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
162
|
+
try {
|
163
|
+
return retryExecutor()
|
164
|
+
.withRetryLimit(maxConnectionRetry)
|
165
|
+
.withInitialRetryWait(500)
|
166
|
+
.withMaxRetryWait(30 * 1000)
|
167
|
+
.runInterruptible(new Retryable<FileList>() {
|
168
|
+
@Override
|
169
|
+
public FileList call() throws IOException
|
170
|
+
{
|
171
|
+
String lastKey = null;
|
172
|
+
log.info("Getting to download file list");
|
173
|
+
StandardFileSystemManager manager = initializeStandardFileSystemManager();
|
174
|
+
FileSystemOptions fsOptions = initializeFsOptions(task);
|
164
175
|
|
165
|
-
|
166
|
-
|
167
|
-
|
176
|
+
if (task.getLastPath().isPresent() && !task.getLastPath().get().isEmpty()) {
|
177
|
+
lastKey = manager.resolveFile(getSftpFileUri(task, task.getLastPath().get()), fsOptions).toString();
|
178
|
+
}
|
168
179
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
180
|
+
FileObject files = manager.resolveFile(getSftpFileUri(task, task.getPathPrefix()), fsOptions);
|
181
|
+
String basename = FilenameUtils.getBaseName(task.getPathPrefix());
|
182
|
+
if (files.isFolder()) {
|
183
|
+
for (FileObject f : files.getChildren()) {
|
184
|
+
if (f.isFile()) {
|
185
|
+
addFileToList(builder, f.toString(), f.getContent().getSize(), "", lastKey);
|
186
|
+
}
|
187
|
+
}
|
188
|
+
}
|
189
|
+
else {
|
190
|
+
FileObject parent = files.getParent();
|
191
|
+
for (FileObject f : parent.getChildren()) {
|
192
|
+
if (f.isFile()) {
|
193
|
+
addFileToList(builder, f.toString(), f.getContent().getSize(), basename, lastKey);
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
return builder.build();
|
175
198
|
}
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
if (f.isFile()) {
|
182
|
-
addFileToList(builder, f.toString(), f.getContent().getSize(), basename, lastKey);
|
199
|
+
|
200
|
+
@Override
|
201
|
+
public boolean isRetryableException(Exception exception)
|
202
|
+
{
|
203
|
+
return true;
|
183
204
|
}
|
184
|
-
}
|
185
|
-
}
|
186
|
-
return builder.build();
|
187
|
-
}
|
188
|
-
catch (FileSystemException ex) {
|
189
|
-
if (++count == maxConnectionRetry) {
|
190
|
-
Throwables.propagate(ex);
|
191
|
-
}
|
192
|
-
log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
|
193
205
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
206
|
+
@Override
|
207
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
208
|
+
throws RetryGiveupException
|
209
|
+
{
|
210
|
+
String message = String.format("SFTP GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
211
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
212
|
+
if (retryCount % 3 == 0) {
|
213
|
+
log.warn(message, exception);
|
214
|
+
}
|
215
|
+
else {
|
216
|
+
log.warn(message);
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
@Override
|
221
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
222
|
+
throws RetryGiveupException
|
223
|
+
{
|
224
|
+
}
|
225
|
+
});
|
226
|
+
}
|
227
|
+
catch (RetryGiveupException ex) {
|
228
|
+
throw Throwables.propagate(ex.getCause());
|
229
|
+
}
|
230
|
+
catch (InterruptedException ex) {
|
231
|
+
throw Throwables.propagate(ex);
|
210
232
|
}
|
211
233
|
}
|
212
234
|
|
@@ -1,15 +1,20 @@
|
|
1
1
|
package org.embulk.input.sftp;
|
2
2
|
|
3
|
+
import com.google.common.base.Throwables;
|
3
4
|
import org.apache.commons.vfs2.FileObject;
|
4
5
|
import org.apache.commons.vfs2.FileSystemException;
|
5
6
|
import org.apache.commons.vfs2.FileSystemOptions;
|
6
7
|
import org.apache.commons.vfs2.impl.StandardFileSystemManager;
|
7
8
|
import org.embulk.spi.Exec;
|
8
9
|
import org.embulk.spi.util.InputStreamFileInput;
|
10
|
+
import org.embulk.spi.util.RetryExecutor.RetryGiveupException;
|
11
|
+
import org.embulk.spi.util.RetryExecutor.Retryable;
|
9
12
|
import org.slf4j.Logger;
|
13
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
10
14
|
|
11
15
|
import java.io.IOException;
|
12
16
|
import java.io.InputStream;
|
17
|
+
import java.io.InterruptedIOException;
|
13
18
|
import java.util.Iterator;
|
14
19
|
|
15
20
|
public class SingleFileProvider
|
@@ -37,33 +42,58 @@ public class SingleFileProvider
|
|
37
42
|
return null;
|
38
43
|
}
|
39
44
|
opened = true;
|
40
|
-
String key = iterator.next();
|
45
|
+
final String key = iterator.next();
|
41
46
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
+
try {
|
48
|
+
return retryExecutor()
|
49
|
+
.withRetryLimit(maxConnectionRetry)
|
50
|
+
.withInitialRetryWait(500)
|
51
|
+
.withMaxRetryWait(30 * 1000)
|
52
|
+
.runInterruptible(new Retryable<InputStream>() {
|
53
|
+
@Override
|
54
|
+
public InputStream call() throws FileSystemException
|
55
|
+
{
|
56
|
+
log.info("Starting to download file {}", key);
|
57
|
+
FileObject file = manager.resolveFile(key, fsOptions);
|
58
|
+
return file.getContent().getInputStream();
|
59
|
+
}
|
47
60
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
}
|
54
|
-
log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
|
61
|
+
@Override
|
62
|
+
public boolean isRetryableException(Exception exception)
|
63
|
+
{
|
64
|
+
return true;
|
65
|
+
}
|
55
66
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
+
@Override
|
68
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
69
|
+
throws RetryGiveupException
|
70
|
+
{
|
71
|
+
if (exception.getMessage().indexOf("Permission denied") > 0) {
|
72
|
+
log.error("Could not download file due to Permission Denied");
|
73
|
+
throw new RetryGiveupException(exception);
|
74
|
+
}
|
75
|
+
String message = String.format("SFTP GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
76
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
77
|
+
if (retryCount % 3 == 0) {
|
78
|
+
log.warn(message, exception);
|
79
|
+
}
|
80
|
+
else {
|
81
|
+
log.warn(message);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
@Override
|
86
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
87
|
+
throws RetryGiveupException
|
88
|
+
{
|
89
|
+
}
|
90
|
+
});
|
91
|
+
}
|
92
|
+
catch (RetryGiveupException ex) {
|
93
|
+
throw Throwables.propagate(ex.getCause());
|
94
|
+
}
|
95
|
+
catch (InterruptedException ex) {
|
96
|
+
throw new InterruptedIOException();
|
67
97
|
}
|
68
98
|
}
|
69
99
|
|
@@ -183,8 +183,8 @@ public class TestSftpFileInputPlugin
|
|
183
183
|
@Test
|
184
184
|
public void testListFiles() throws Exception
|
185
185
|
{
|
186
|
-
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
187
|
-
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
186
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv", true);
|
187
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv", true);
|
188
188
|
|
189
189
|
PluginTask task = config.loadConfig(PluginTask.class);
|
190
190
|
|
@@ -215,8 +215,8 @@ public class TestSftpFileInputPlugin
|
|
215
215
|
@Test
|
216
216
|
public void testSftpInputByOpen() throws Exception
|
217
217
|
{
|
218
|
-
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
219
|
-
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
218
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv", true);
|
219
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv", true);
|
220
220
|
|
221
221
|
PluginTask task = config.loadConfig(PluginTask.class);
|
222
222
|
runner.transaction(config, new Control());
|
@@ -235,8 +235,8 @@ public class TestSftpFileInputPlugin
|
|
235
235
|
// try {
|
236
236
|
// proxyServer = createProxyServer(PROXY_PORT);
|
237
237
|
//
|
238
|
-
// uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
239
|
-
// uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
238
|
+
// uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv", true);
|
239
|
+
// uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv", true);
|
240
240
|
//
|
241
241
|
// ConfigSource config = Exec.newConfigSource()
|
242
242
|
// .set("host", HOST)
|
@@ -269,8 +269,8 @@ public class TestSftpFileInputPlugin
|
|
269
269
|
@Test
|
270
270
|
public void testSftpInputByOpenTimeout() throws Exception
|
271
271
|
{
|
272
|
-
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
273
|
-
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
272
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv", true);
|
273
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv", true);
|
274
274
|
|
275
275
|
ConfigSource config = Exec.newConfigSource()
|
276
276
|
.set("host", HOST)
|
@@ -289,6 +289,29 @@ public class TestSftpFileInputPlugin
|
|
289
289
|
runner.transaction(config, new Control());
|
290
290
|
}
|
291
291
|
|
292
|
+
@Test
|
293
|
+
public void testSftpInputByOpenFailWithRetry() throws Exception
|
294
|
+
{
|
295
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv", false);
|
296
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv", false);
|
297
|
+
|
298
|
+
ConfigSource config = Exec.newConfigSource()
|
299
|
+
.set("host", HOST)
|
300
|
+
.set("port", PORT)
|
301
|
+
.set("user", USERNAME)
|
302
|
+
.set("password", PASSWORD)
|
303
|
+
.set("path_prefix", REMOTE_DIRECTORY)
|
304
|
+
.set("max_connection_retry", 2)
|
305
|
+
.set("last_path", "")
|
306
|
+
.set("parser", parserConfig(schemaConfig()));
|
307
|
+
|
308
|
+
exception.expect(RuntimeException.class);
|
309
|
+
exception.expectCause(CoreMatchers.<Throwable>instanceOf(FileSystemException.class));
|
310
|
+
exception.expectMessage(CoreMatchers.containsString("Unknown message with code \"java.nio.file.AccessDeniedException"));
|
311
|
+
|
312
|
+
runner.transaction(config, new Control());
|
313
|
+
}
|
314
|
+
|
292
315
|
@Test
|
293
316
|
public void testProxyType()
|
294
317
|
{
|
@@ -377,7 +400,7 @@ public class TestSftpFileInputPlugin
|
|
377
400
|
.start();
|
378
401
|
}
|
379
402
|
|
380
|
-
private void uploadFile(String localPath, String remotePath) throws Exception
|
403
|
+
private void uploadFile(String localPath, String remotePath, boolean isReadable) throws Exception
|
381
404
|
{
|
382
405
|
PluginTask task = config.loadConfig(PluginTask.class);
|
383
406
|
|
@@ -388,11 +411,15 @@ public class TestSftpFileInputPlugin
|
|
388
411
|
while (true) {
|
389
412
|
try {
|
390
413
|
StandardFileSystemManager manager = new StandardFileSystemManager();
|
414
|
+
manager.setClassLoader(TestSftpFileInputPlugin.class.getClassLoader());
|
391
415
|
manager.init();
|
392
416
|
|
393
417
|
FileObject localFile = manager.resolveFile(localPath);
|
394
418
|
FileObject remoteFile = manager.resolveFile(uri, fsOptions);
|
395
419
|
remoteFile.copyFrom(localFile, Selectors.SELECT_SELF);
|
420
|
+
if (!isReadable) {
|
421
|
+
remoteFile.setReadable(false, false);
|
422
|
+
}
|
396
423
|
|
397
424
|
if (log.isDebugEnabled()) {
|
398
425
|
FileObject files = manager.resolveFile(SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-sftp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Akama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,7 +71,7 @@ files:
|
|
71
71
|
- classpath/commons-io-1.3.2.jar
|
72
72
|
- classpath/commons-logging-1.2.jar
|
73
73
|
- classpath/commons-vfs2-2.1.1660580.2.jar
|
74
|
-
- classpath/embulk-input-sftp-0.1.
|
74
|
+
- classpath/embulk-input-sftp-0.1.2.jar
|
75
75
|
- classpath/jsch-0.1.53.jar
|
76
76
|
homepage: https://github.com/sakama/embulk-input-sftp
|
77
77
|
licenses:
|