embulk-input-sftp 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +18 -0
- data/build.gradle +2 -1
- data/src/main/java/org/embulk/input/sftp/FileList.java +341 -0
- data/src/main/java/org/embulk/input/sftp/PluginTask.java +3 -5
- data/src/main/java/org/embulk/input/sftp/SftpFileInput.java +51 -16
- data/src/main/java/org/embulk/input/sftp/SftpFileInputPlugin.java +2 -15
- data/src/main/java/org/embulk/input/sftp/SingleFileProvider.java +11 -8
- data/src/test/java/org/embulk/input/sftp/TestFileList.java +87 -0
- data/src/test/java/org/embulk/input/sftp/TestSftpFileInputPlugin.java +548 -0
- data/src/test/resources/sample_01.csv +6 -0
- data/src/test/resources/sample_02.csv +6 -0
- metadata +8 -3
@@ -8,8 +8,6 @@ import org.embulk.spi.Exec;
|
|
8
8
|
import org.embulk.spi.FileInputPlugin;
|
9
9
|
import org.embulk.spi.TransactionalFileInput;
|
10
10
|
|
11
|
-
import java.util.ArrayList;
|
12
|
-
import java.util.Collections;
|
13
11
|
import java.util.List;
|
14
12
|
|
15
13
|
public class SftpFileInputPlugin
|
@@ -23,7 +21,7 @@ public class SftpFileInputPlugin
|
|
23
21
|
// list files recursively
|
24
22
|
task.setFiles(SftpFileInput.listFilesByPrefix(task));
|
25
23
|
// number of processors is same with number of files
|
26
|
-
return resume(task.dump(), task.getFiles().
|
24
|
+
return resume(task.dump(), task.getFiles().getTaskCount(), control);
|
27
25
|
}
|
28
26
|
|
29
27
|
@Override
|
@@ -32,21 +30,10 @@ public class SftpFileInputPlugin
|
|
32
30
|
FileInputPlugin.Control control)
|
33
31
|
{
|
34
32
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
35
|
-
|
36
33
|
control.run(taskSource, taskCount);
|
37
34
|
|
38
35
|
ConfigDiff configDiff = Exec.newConfigDiff();
|
39
|
-
|
40
|
-
List<String> files = new ArrayList<String>(task.getFiles());
|
41
|
-
if (files.isEmpty()) {
|
42
|
-
if (task.getLastPath().isPresent()) {
|
43
|
-
configDiff.set("last_path", task.getLastPath().get());
|
44
|
-
}
|
45
|
-
}
|
46
|
-
else {
|
47
|
-
Collections.sort(files);
|
48
|
-
configDiff.set("last_path", files.get(files.size() - 1));
|
49
|
-
}
|
36
|
+
configDiff.set("last_path", SftpFileInput.getRelativePath(task.getFiles().getLastPath(task.getLastPath())));
|
50
37
|
|
51
38
|
return configDiff;
|
52
39
|
}
|
@@ -2,6 +2,7 @@ package org.embulk.input.sftp;
|
|
2
2
|
|
3
3
|
import org.apache.commons.vfs2.FileObject;
|
4
4
|
import org.apache.commons.vfs2.FileSystemException;
|
5
|
+
import org.apache.commons.vfs2.FileSystemOptions;
|
5
6
|
import org.apache.commons.vfs2.impl.StandardFileSystemManager;
|
6
7
|
import org.embulk.spi.Exec;
|
7
8
|
import org.embulk.spi.util.InputStreamFileInput;
|
@@ -9,43 +10,45 @@ import org.slf4j.Logger;
|
|
9
10
|
|
10
11
|
import java.io.IOException;
|
11
12
|
import java.io.InputStream;
|
13
|
+
import java.util.Iterator;
|
12
14
|
|
13
15
|
public class SingleFileProvider
|
14
16
|
implements InputStreamFileInput.Provider
|
15
17
|
{
|
16
18
|
private final StandardFileSystemManager manager;
|
17
|
-
private final
|
18
|
-
private final
|
19
|
+
private final FileSystemOptions fsOptions;
|
20
|
+
private final Iterator<String> iterator;
|
19
21
|
private final int maxConnectionRetry;
|
20
22
|
private boolean opened = false;
|
21
23
|
private final Logger log = Exec.getLogger(SingleFileProvider.class);
|
22
24
|
|
23
|
-
public SingleFileProvider(PluginTask task, int taskIndex, StandardFileSystemManager manager)
|
25
|
+
public SingleFileProvider(PluginTask task, int taskIndex, StandardFileSystemManager manager, FileSystemOptions fsOptions)
|
24
26
|
{
|
25
27
|
this.manager = manager;
|
26
|
-
this.
|
27
|
-
this.
|
28
|
+
this.fsOptions = fsOptions;
|
29
|
+
this.iterator = task.getFiles().get(taskIndex).iterator();
|
28
30
|
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
29
31
|
}
|
30
32
|
|
31
33
|
@Override
|
32
34
|
public InputStream openNext() throws IOException
|
33
35
|
{
|
34
|
-
if (opened) {
|
36
|
+
if (opened || !iterator.hasNext()) {
|
35
37
|
return null;
|
36
38
|
}
|
37
39
|
opened = true;
|
40
|
+
String key = iterator.next();
|
38
41
|
|
39
42
|
int count = 0;
|
40
43
|
while (true) {
|
41
44
|
try {
|
42
|
-
FileObject file = manager.resolveFile(key,
|
45
|
+
FileObject file = manager.resolveFile(key, fsOptions);
|
43
46
|
log.info("Starting to download file {}", key);
|
44
47
|
|
45
48
|
return file.getContent().getInputStream();
|
46
49
|
}
|
47
50
|
catch (FileSystemException ex) {
|
48
|
-
if (++count == maxConnectionRetry) {
|
51
|
+
if (++count == maxConnectionRetry || ex.getMessage().indexOf("Permission denied") > 0) {
|
49
52
|
throw ex;
|
50
53
|
}
|
51
54
|
log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
|
@@ -0,0 +1,87 @@
|
|
1
|
+
package org.embulk.input.sftp;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.junit.Before;
|
6
|
+
import org.junit.Rule;
|
7
|
+
import org.junit.Test;
|
8
|
+
|
9
|
+
import static org.junit.Assert.assertEquals;
|
10
|
+
|
11
|
+
public class TestFileList
|
12
|
+
{
|
13
|
+
@Rule
|
14
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
15
|
+
|
16
|
+
private ConfigSource config;
|
17
|
+
|
18
|
+
@Before
|
19
|
+
public void createConfigSource()
|
20
|
+
{
|
21
|
+
config = runtime.getExec().newConfigSource();
|
22
|
+
}
|
23
|
+
|
24
|
+
@Test
|
25
|
+
public void checkMinTaskSize()
|
26
|
+
throws Exception
|
27
|
+
{
|
28
|
+
{ // not specify min_task_size
|
29
|
+
FileList fileList = newFileList(config.deepCopy(),
|
30
|
+
"sample_00", 100L,
|
31
|
+
"sample_01", 150L,
|
32
|
+
"sample_02", 350L);
|
33
|
+
|
34
|
+
assertEquals(3, fileList.getTaskCount());
|
35
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
36
|
+
assertEquals("sample_01", fileList.get(1).get(0));
|
37
|
+
assertEquals("sample_02", fileList.get(2).get(0));
|
38
|
+
}
|
39
|
+
|
40
|
+
{
|
41
|
+
FileList fileList = newFileList(config.deepCopy().set("min_task_size", 100),
|
42
|
+
"sample_00", 100L,
|
43
|
+
"sample_01", 150L,
|
44
|
+
"sample_02", 350L);
|
45
|
+
|
46
|
+
assertEquals(3, fileList.getTaskCount());
|
47
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
48
|
+
assertEquals("sample_01", fileList.get(1).get(0));
|
49
|
+
assertEquals("sample_02", fileList.get(2).get(0));
|
50
|
+
}
|
51
|
+
|
52
|
+
{
|
53
|
+
FileList fileList = newFileList(config.deepCopy().set("min_task_size", 200),
|
54
|
+
"sample_00", 100L,
|
55
|
+
"sample_01", 150L,
|
56
|
+
"sample_02", 350L);
|
57
|
+
|
58
|
+
assertEquals(2, fileList.getTaskCount());
|
59
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
60
|
+
assertEquals("sample_01", fileList.get(0).get(1));
|
61
|
+
assertEquals("sample_02", fileList.get(1).get(0));
|
62
|
+
}
|
63
|
+
|
64
|
+
{
|
65
|
+
FileList fileList = newFileList(config.deepCopy().set("min_task_size", 700),
|
66
|
+
"sample_00", 100L,
|
67
|
+
"sample_01", 150L,
|
68
|
+
"sample_02", 350L);
|
69
|
+
|
70
|
+
assertEquals(1, fileList.getTaskCount());
|
71
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
72
|
+
assertEquals("sample_01", fileList.get(0).get(1));
|
73
|
+
assertEquals("sample_02", fileList.get(0).get(2));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
private static FileList newFileList(ConfigSource config, Object... nameAndSize)
|
78
|
+
{
|
79
|
+
FileList.Builder builder = new FileList.Builder(config);
|
80
|
+
|
81
|
+
for (int i = 0; i < nameAndSize.length; i += 2) {
|
82
|
+
builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
|
83
|
+
}
|
84
|
+
|
85
|
+
return builder.build();
|
86
|
+
}
|
87
|
+
}
|
@@ -1,5 +1,553 @@
|
|
1
1
|
package org.embulk.input.sftp;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Throwables;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
6
|
+
import com.google.common.collect.ImmutableMap;
|
7
|
+
import com.google.common.collect.Lists;
|
8
|
+
import com.google.common.io.Resources;
|
9
|
+
import org.apache.commons.vfs2.FileObject;
|
10
|
+
import org.apache.commons.vfs2.FileSystemException;
|
11
|
+
import org.apache.commons.vfs2.FileSystemOptions;
|
12
|
+
import org.apache.commons.vfs2.Selectors;
|
13
|
+
import org.apache.commons.vfs2.impl.StandardFileSystemManager;
|
14
|
+
import org.apache.commons.vfs2.provider.sftp.SftpFileSystemConfigBuilder;
|
15
|
+
import org.apache.sshd.common.NamedFactory;
|
16
|
+
import org.apache.sshd.common.file.virtualfs.VirtualFileSystemFactory;
|
17
|
+
import org.apache.sshd.server.Command;
|
18
|
+
import org.apache.sshd.server.SshServer;
|
19
|
+
import org.apache.sshd.server.auth.password.PasswordAuthenticator;
|
20
|
+
import org.apache.sshd.server.auth.pubkey.PublickeyAuthenticator;
|
21
|
+
import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider;
|
22
|
+
import org.apache.sshd.server.scp.ScpCommandFactory;
|
23
|
+
import org.apache.sshd.server.session.ServerSession;
|
24
|
+
import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory;
|
25
|
+
import org.embulk.EmbulkTestRuntime;
|
26
|
+
import org.embulk.config.ConfigDiff;
|
27
|
+
import org.embulk.config.ConfigException;
|
28
|
+
import org.embulk.config.ConfigSource;
|
29
|
+
import org.embulk.config.TaskReport;
|
30
|
+
import org.embulk.config.TaskSource;
|
31
|
+
import org.embulk.spi.Exec;
|
32
|
+
import org.embulk.spi.FileInputPlugin;
|
33
|
+
import org.embulk.spi.FileInputRunner;
|
34
|
+
import org.embulk.spi.InputPlugin;
|
35
|
+
import org.embulk.spi.Schema;
|
36
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
37
|
+
import org.embulk.spi.util.Pages;
|
38
|
+
import org.embulk.standards.CsvParserPlugin;
|
39
|
+
import org.hamcrest.CoreMatchers;
|
40
|
+
import org.junit.After;
|
41
|
+
import org.junit.Before;
|
42
|
+
import org.junit.Rule;
|
43
|
+
import org.junit.Test;
|
44
|
+
import org.junit.rules.ExpectedException;
|
45
|
+
import org.junit.rules.TemporaryFolder;
|
46
|
+
import org.littleshoot.proxy.HttpProxyServer;
|
47
|
+
import org.littleshoot.proxy.impl.DefaultHttpProxyServer;
|
48
|
+
import org.slf4j.Logger;
|
49
|
+
|
50
|
+
import java.io.IOException;
|
51
|
+
import java.lang.reflect.Method;
|
52
|
+
import java.security.PublicKey;
|
53
|
+
import java.util.ArrayList;
|
54
|
+
import java.util.Arrays;
|
55
|
+
import java.util.Collections;
|
56
|
+
import java.util.List;
|
57
|
+
|
58
|
+
import static org.junit.Assert.assertEquals;
|
59
|
+
|
3
60
|
public class TestSftpFileInputPlugin
|
4
61
|
{
|
62
|
+
@Rule
|
63
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
64
|
+
|
65
|
+
@Rule
|
66
|
+
public ExpectedException exception = ExpectedException.none();
|
67
|
+
|
68
|
+
@Rule
|
69
|
+
public TemporaryFolder testFolder = new TemporaryFolder();
|
70
|
+
|
71
|
+
private Logger log = runtime.getExec().getLogger(TestSftpFileInputPlugin.class);
|
72
|
+
private ConfigSource config;
|
73
|
+
private SftpFileInputPlugin plugin;
|
74
|
+
private FileInputRunner runner;
|
75
|
+
private MockPageOutput output;
|
76
|
+
private SshServer sshServer;
|
77
|
+
private static final String HOST = "127.0.0.1";
|
78
|
+
private static final int PORT = 20022;
|
79
|
+
private static final String USERNAME = "username";
|
80
|
+
private static final String PASSWORD = "password";
|
81
|
+
private static final String REMOTE_DIRECTORY = "/home/username/unittest/";
|
82
|
+
private static final String SECRET_KEY_FILE = Resources.getResource("id_rsa").getPath();
|
83
|
+
private static final String SECRET_KEY_PASSPHRASE = "SECRET_KEY_PASSPHRASE";
|
84
|
+
private static final String PROXY_HOST = "127.0.0.1";
|
85
|
+
private static final int PROXY_PORT = 8080;
|
86
|
+
|
87
|
+
@Before
|
88
|
+
public void createResources() throws Exception
|
89
|
+
{
|
90
|
+
config = config();
|
91
|
+
plugin = new SftpFileInputPlugin();
|
92
|
+
runner = new FileInputRunner(runtime.getInstance(SftpFileInputPlugin.class));
|
93
|
+
output = new MockPageOutput();
|
94
|
+
|
95
|
+
if (!log.isDebugEnabled()) {
|
96
|
+
// TODO: change logging format: org.apache.commons.logging.Log
|
97
|
+
System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");
|
98
|
+
}
|
99
|
+
|
100
|
+
sshServer = createSshServer(HOST, PORT, USERNAME, PASSWORD);
|
101
|
+
}
|
102
|
+
|
103
|
+
@After
|
104
|
+
public void cleanup() throws InterruptedException
|
105
|
+
{
|
106
|
+
try {
|
107
|
+
sshServer.stop(true);
|
108
|
+
}
|
109
|
+
catch (Exception ex) {
|
110
|
+
log.debug(ex.getMessage(), ex);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
@Test
|
115
|
+
public void checkDefaultValues()
|
116
|
+
{
|
117
|
+
ConfigSource config = Exec.newConfigSource()
|
118
|
+
.set("host", HOST)
|
119
|
+
.set("user", USERNAME)
|
120
|
+
.set("password", PASSWORD)
|
121
|
+
.set("path_prefix", "")
|
122
|
+
.set("last_path", "")
|
123
|
+
.set("parser", parserConfig(schemaConfig()));
|
124
|
+
|
125
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
126
|
+
assertEquals(22, task.getPort());
|
127
|
+
assertEquals(true, task.getUserDirIsRoot());
|
128
|
+
assertEquals(600, task.getSftpConnectionTimeout());
|
129
|
+
assertEquals(5, task.getMaxConnectionRetry());
|
130
|
+
}
|
131
|
+
|
132
|
+
@Test(expected = ConfigException.class)
|
133
|
+
public void checkDefaultValuesHostIsNull()
|
134
|
+
{
|
135
|
+
ConfigSource config = Exec.newConfigSource()
|
136
|
+
.set("host", null)
|
137
|
+
.set("user", USERNAME)
|
138
|
+
.set("password", PASSWORD)
|
139
|
+
.set("path_prefix", "")
|
140
|
+
.set("last_path", "")
|
141
|
+
.set("parser", parserConfig(schemaConfig()));
|
142
|
+
|
143
|
+
runner.transaction(config, new Control());
|
144
|
+
}
|
145
|
+
|
146
|
+
@Test(expected = ConfigException.class)
|
147
|
+
public void checkDefaultValuesUserIsNull()
|
148
|
+
{
|
149
|
+
ConfigSource config = Exec.newConfigSource()
|
150
|
+
.set("host", HOST)
|
151
|
+
.set("user", null)
|
152
|
+
.set("password", PASSWORD)
|
153
|
+
.set("path_prefix", "")
|
154
|
+
.set("last_path", "")
|
155
|
+
.set("parser", parserConfig(schemaConfig()));
|
156
|
+
|
157
|
+
runner.transaction(config, new Control());
|
158
|
+
}
|
159
|
+
|
160
|
+
@Test
|
161
|
+
public void testResume()
|
162
|
+
{
|
163
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
164
|
+
task.setFiles(createFileList(Arrays.asList("in/aa/a"), task));
|
165
|
+
ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
|
166
|
+
{
|
167
|
+
@Override
|
168
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
169
|
+
{
|
170
|
+
return emptyTaskReports(taskCount);
|
171
|
+
}
|
172
|
+
});
|
173
|
+
assertEquals("in/aa/a", configDiff.get(String.class, "last_path"));
|
174
|
+
}
|
175
|
+
|
176
|
+
@Test
|
177
|
+
public void testCleanup()
|
178
|
+
{
|
179
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
180
|
+
plugin.cleanup(task.dump(), 0, Lists.<TaskReport>newArrayList()); // no errors happens
|
181
|
+
}
|
182
|
+
|
183
|
+
@Test
|
184
|
+
public void testListFiles() throws Exception
|
185
|
+
{
|
186
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
187
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
188
|
+
|
189
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
190
|
+
|
191
|
+
List<String> fileList = Arrays.asList(
|
192
|
+
SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY + "sample_01.csv"),
|
193
|
+
SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY + "sample_02.csv")
|
194
|
+
);
|
195
|
+
FileList expected = createFileList(fileList, task);
|
196
|
+
|
197
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
198
|
+
@Override
|
199
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
200
|
+
{
|
201
|
+
assertEquals(2, taskCount);
|
202
|
+
return emptyTaskReports(taskCount);
|
203
|
+
}
|
204
|
+
});
|
205
|
+
|
206
|
+
Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
|
207
|
+
listFilesByPrefix.setAccessible(true);
|
208
|
+
FileList actual = (FileList) listFilesByPrefix.invoke(plugin, task);
|
209
|
+
|
210
|
+
assertEquals(expected.get(0), actual.get(0));
|
211
|
+
assertEquals(expected.get(1), actual.get(1));
|
212
|
+
assertEquals(SftpFileInput.getRelativePath(Optional.of(expected.get(1).get(0))), configDiff.get(String.class, "last_path"));
|
213
|
+
}
|
214
|
+
|
215
|
+
@Test
|
216
|
+
public void testSftpInputByOpen() throws Exception
|
217
|
+
{
|
218
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
219
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
220
|
+
|
221
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
222
|
+
runner.transaction(config, new Control());
|
223
|
+
|
224
|
+
Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
|
225
|
+
listFilesByPrefix.setAccessible(true);
|
226
|
+
task.setFiles((FileList) listFilesByPrefix.invoke(plugin, task));
|
227
|
+
|
228
|
+
assertRecords(config, output);
|
229
|
+
}
|
230
|
+
|
231
|
+
// @Test
|
232
|
+
// public void testSftpInputByOpenWithProxy() throws Exception
|
233
|
+
// {
|
234
|
+
// HttpProxyServer proxyServer = null;
|
235
|
+
// try {
|
236
|
+
// proxyServer = createProxyServer(PROXY_PORT);
|
237
|
+
//
|
238
|
+
// uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
239
|
+
// uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
240
|
+
//
|
241
|
+
// ConfigSource config = Exec.newConfigSource()
|
242
|
+
// .set("host", HOST)
|
243
|
+
// .set("port", PORT)
|
244
|
+
// .set("user", USERNAME)
|
245
|
+
// .set("password", PASSWORD)
|
246
|
+
// .set("path_prefix", REMOTE_DIRECTORY)
|
247
|
+
// .set("last_path", "")
|
248
|
+
// .set("proxy", proxyConfig())
|
249
|
+
// .set("parser", parserConfig(schemaConfig()));
|
250
|
+
//
|
251
|
+
// PluginTask task = config.loadConfig(PluginTask.class);
|
252
|
+
// runner.transaction(config, new Control());
|
253
|
+
//
|
254
|
+
// Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
|
255
|
+
// listFilesByPrefix.setAccessible(true);
|
256
|
+
// task.setFiles((FileList) listFilesByPrefix.invoke(plugin, task));
|
257
|
+
//
|
258
|
+
// assertRecords(config, output);
|
259
|
+
// log.info("config:", config);
|
260
|
+
// log.info("output:", output);
|
261
|
+
// }
|
262
|
+
// finally {
|
263
|
+
// if (proxyServer != null) {
|
264
|
+
// proxyServer.stop();
|
265
|
+
// }
|
266
|
+
// }
|
267
|
+
// }
|
268
|
+
|
269
|
+
@Test
|
270
|
+
public void testSftpInputByOpenTimeout() throws Exception
|
271
|
+
{
|
272
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
273
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
274
|
+
|
275
|
+
ConfigSource config = Exec.newConfigSource()
|
276
|
+
.set("host", HOST)
|
277
|
+
.set("port", PORT)
|
278
|
+
.set("user", "invalid-username")
|
279
|
+
.set("password", PASSWORD)
|
280
|
+
.set("path_prefix", REMOTE_DIRECTORY)
|
281
|
+
.set("max_connection_retry", 2)
|
282
|
+
.set("last_path", "")
|
283
|
+
.set("parser", parserConfig(schemaConfig()));
|
284
|
+
|
285
|
+
exception.expect(RuntimeException.class);
|
286
|
+
exception.expectCause(CoreMatchers.<Throwable>instanceOf(FileSystemException.class));
|
287
|
+
exception.expectMessage("Could not connect to SFTP server");
|
288
|
+
|
289
|
+
runner.transaction(config, new Control());
|
290
|
+
}
|
291
|
+
|
292
|
+
@Test
|
293
|
+
public void testProxyType()
|
294
|
+
{
|
295
|
+
// test valueOf()
|
296
|
+
assertEquals("http", ProxyTask.ProxyType.valueOf("HTTP").toString());
|
297
|
+
assertEquals("socks", ProxyTask.ProxyType.valueOf("SOCKS").toString());
|
298
|
+
assertEquals("stream", ProxyTask.ProxyType.valueOf("STREAM").toString());
|
299
|
+
try {
|
300
|
+
ProxyTask.ProxyType.valueOf("non-existing-type");
|
301
|
+
}
|
302
|
+
catch (Exception ex) {
|
303
|
+
assertEquals(IllegalArgumentException.class, ex.getClass());
|
304
|
+
}
|
305
|
+
|
306
|
+
// test fromString
|
307
|
+
assertEquals(ProxyTask.ProxyType.HTTP, ProxyTask.ProxyType.fromString("http"));
|
308
|
+
assertEquals(ProxyTask.ProxyType.SOCKS, ProxyTask.ProxyType.fromString("socks"));
|
309
|
+
assertEquals(ProxyTask.ProxyType.STREAM, ProxyTask.ProxyType.fromString("stream"));
|
310
|
+
try {
|
311
|
+
ProxyTask.ProxyType.fromString("non-existing-type");
|
312
|
+
}
|
313
|
+
catch (Exception ex) {
|
314
|
+
assertEquals(ConfigException.class, ex.getClass());
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
@Test
|
319
|
+
public void testSetProxyType() throws Exception
|
320
|
+
{
|
321
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
322
|
+
FileSystemOptions fsOptions = SftpFileInput.initializeFsOptions(task);
|
323
|
+
SftpFileSystemConfigBuilder builder = SftpFileSystemConfigBuilder.getInstance();
|
324
|
+
|
325
|
+
ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.HTTP);
|
326
|
+
assertEquals(SftpFileSystemConfigBuilder.PROXY_HTTP, builder.getProxyType(fsOptions));
|
327
|
+
|
328
|
+
ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.SOCKS);
|
329
|
+
assertEquals(SftpFileSystemConfigBuilder.PROXY_SOCKS5, builder.getProxyType(fsOptions));
|
330
|
+
|
331
|
+
ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.STREAM);
|
332
|
+
assertEquals(SftpFileSystemConfigBuilder.PROXY_STREAM, builder.getProxyType(fsOptions));
|
333
|
+
}
|
334
|
+
|
335
|
+
private SshServer createSshServer(String host, int port, final String sshUsername, final String sshPassword)
|
336
|
+
{
|
337
|
+
// setup a mock sftp server
|
338
|
+
SshServer sshServer = SshServer.setUpDefaultServer();
|
339
|
+
VirtualFileSystemFactory fsFactory = new VirtualFileSystemFactory();
|
340
|
+
fsFactory.setUserHomeDir(sshUsername, testFolder.getRoot().toPath());
|
341
|
+
sshServer.setFileSystemFactory(fsFactory);
|
342
|
+
sshServer.setHost(host);
|
343
|
+
sshServer.setPort(port);
|
344
|
+
sshServer.setSubsystemFactories(Collections.<NamedFactory<Command>>singletonList(new SftpSubsystemFactory()));
|
345
|
+
sshServer.setCommandFactory(new ScpCommandFactory());
|
346
|
+
sshServer.setKeyPairProvider(new SimpleGeneratorHostKeyProvider());
|
347
|
+
sshServer.setPasswordAuthenticator(new PasswordAuthenticator()
|
348
|
+
{
|
349
|
+
@Override
|
350
|
+
public boolean authenticate(final String username, final String password, final ServerSession session)
|
351
|
+
{
|
352
|
+
return sshUsername.contentEquals(username) && sshPassword.contentEquals(password);
|
353
|
+
}
|
354
|
+
});
|
355
|
+
sshServer.setPublickeyAuthenticator(new PublickeyAuthenticator()
|
356
|
+
{
|
357
|
+
@Override
|
358
|
+
public boolean authenticate(String username, PublicKey key, ServerSession session)
|
359
|
+
{
|
360
|
+
return true;
|
361
|
+
}
|
362
|
+
});
|
363
|
+
|
364
|
+
try {
|
365
|
+
sshServer.start();
|
366
|
+
}
|
367
|
+
catch (IOException ex) {
|
368
|
+
log.debug(ex.getMessage(), ex);
|
369
|
+
}
|
370
|
+
return sshServer;
|
371
|
+
}
|
372
|
+
|
373
|
+
private HttpProxyServer createProxyServer(int port)
|
374
|
+
{
|
375
|
+
return DefaultHttpProxyServer.bootstrap()
|
376
|
+
.withPort(port)
|
377
|
+
.start();
|
378
|
+
}
|
379
|
+
|
380
|
+
private void uploadFile(String localPath, String remotePath) throws Exception
|
381
|
+
{
|
382
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
383
|
+
|
384
|
+
FileSystemOptions fsOptions = SftpFileInput.initializeFsOptions(task);
|
385
|
+
String uri = SftpFileInput.getSftpFileUri(task, remotePath);
|
386
|
+
|
387
|
+
int count = 0;
|
388
|
+
while (true) {
|
389
|
+
try {
|
390
|
+
StandardFileSystemManager manager = new StandardFileSystemManager();
|
391
|
+
manager.init();
|
392
|
+
|
393
|
+
FileObject localFile = manager.resolveFile(localPath);
|
394
|
+
FileObject remoteFile = manager.resolveFile(uri, fsOptions);
|
395
|
+
remoteFile.copyFrom(localFile, Selectors.SELECT_SELF);
|
396
|
+
|
397
|
+
if (log.isDebugEnabled()) {
|
398
|
+
FileObject files = manager.resolveFile(SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY));
|
399
|
+
for (FileObject f : files.getChildren()) {
|
400
|
+
if (f.isFile()) {
|
401
|
+
log.debug("remote file list:" + f.toString());
|
402
|
+
}
|
403
|
+
}
|
404
|
+
}
|
405
|
+
return;
|
406
|
+
}
|
407
|
+
catch (FileSystemException ex) {
|
408
|
+
if (++count == task.getMaxConnectionRetry()) {
|
409
|
+
Throwables.propagate(ex);
|
410
|
+
}
|
411
|
+
log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
|
412
|
+
|
413
|
+
try {
|
414
|
+
long sleepTime = ((long) Math.pow(2, count) * 1000);
|
415
|
+
log.warn("sleep in next connection retry: {} milliseconds", sleepTime);
|
416
|
+
Thread.sleep(sleepTime); // milliseconds
|
417
|
+
}
|
418
|
+
catch (InterruptedException ex2) {
|
419
|
+
// Ignore this exception because this exception is just about `sleep`.
|
420
|
+
log.warn(ex2.getMessage(), ex2);
|
421
|
+
}
|
422
|
+
log.warn("retrying to connect sftp server: " + count + " times");
|
423
|
+
}
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
private FileList createFileList(List<String> fileList, PluginTask task)
|
428
|
+
{
|
429
|
+
FileList.Builder builder = new FileList.Builder(task);
|
430
|
+
for (String file : fileList) {
|
431
|
+
builder.add(file, 0);
|
432
|
+
}
|
433
|
+
return builder.build();
|
434
|
+
}
|
435
|
+
|
436
|
+
static List<TaskReport> emptyTaskReports(int taskCount)
|
437
|
+
{
|
438
|
+
ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
|
439
|
+
for (int i = 0; i < taskCount; i++) {
|
440
|
+
reports.add(Exec.newTaskReport());
|
441
|
+
}
|
442
|
+
return reports.build();
|
443
|
+
}
|
444
|
+
|
445
|
+
private class Control
|
446
|
+
implements InputPlugin.Control
|
447
|
+
{
|
448
|
+
@Override
|
449
|
+
public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
450
|
+
{
|
451
|
+
List<TaskReport> reports = new ArrayList<>();
|
452
|
+
for (int i = 0; i < taskCount; i++) {
|
453
|
+
reports.add(runner.run(taskSource, schema, i, output));
|
454
|
+
}
|
455
|
+
return reports;
|
456
|
+
}
|
457
|
+
}
|
458
|
+
|
459
|
+
private ConfigSource config()
|
460
|
+
{
|
461
|
+
return Exec.newConfigSource()
|
462
|
+
.set("host", HOST)
|
463
|
+
.set("port", PORT)
|
464
|
+
.set("user", USERNAME)
|
465
|
+
.set("password", PASSWORD)
|
466
|
+
.set("path_prefix", REMOTE_DIRECTORY)
|
467
|
+
.set("last_path", "")
|
468
|
+
.set("parser", parserConfig(schemaConfig()));
|
469
|
+
}
|
470
|
+
|
471
|
+
private ImmutableMap<String, Object> proxyConfig()
|
472
|
+
{
|
473
|
+
ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
|
474
|
+
builder.put("type", "http");
|
475
|
+
builder.put("host", PROXY_HOST);
|
476
|
+
builder.put("port", PROXY_PORT);
|
477
|
+
return builder.build();
|
478
|
+
}
|
479
|
+
|
480
|
+
private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
|
481
|
+
{
|
482
|
+
ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
|
483
|
+
builder.put("type", "csv");
|
484
|
+
builder.put("newline", "CRLF");
|
485
|
+
builder.put("delimiter", ",");
|
486
|
+
builder.put("quote", "\"");
|
487
|
+
builder.put("escape", "\"");
|
488
|
+
builder.put("trim_if_not_quoted", false);
|
489
|
+
builder.put("skip_header_lines", 1);
|
490
|
+
builder.put("allow_extra_columns", false);
|
491
|
+
builder.put("allow_optional_columns", false);
|
492
|
+
builder.put("columns", schemaConfig);
|
493
|
+
return builder.build();
|
494
|
+
}
|
495
|
+
|
496
|
+
private ImmutableList<Object> schemaConfig()
|
497
|
+
{
|
498
|
+
ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
|
499
|
+
builder.add(ImmutableMap.of("name", "id", "type", "long"));
|
500
|
+
builder.add(ImmutableMap.of("name", "account", "type", "long"));
|
501
|
+
builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
|
502
|
+
builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
|
503
|
+
builder.add(ImmutableMap.of("name", "comment", "type", "string"));
|
504
|
+
builder.add(ImmutableMap.of("name", "json_column", "type", "json"));
|
505
|
+
return builder.build();
|
506
|
+
}
|
507
|
+
|
508
|
+
private void assertRecords(ConfigSource config, MockPageOutput output)
|
509
|
+
{
|
510
|
+
List<Object[]> records = getRecords(config, output);
|
511
|
+
assertEquals(10, records.size());
|
512
|
+
{
|
513
|
+
Object[] record = records.get(0);
|
514
|
+
assertEquals(1L, record[0]);
|
515
|
+
assertEquals(32864L, record[1]);
|
516
|
+
assertEquals("2015-01-27 19:23:49 UTC", record[2].toString());
|
517
|
+
assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
|
518
|
+
assertEquals("embulk", record[4]);
|
519
|
+
assertEquals("{\"k\":true}", record[5].toString());
|
520
|
+
}
|
521
|
+
|
522
|
+
{
|
523
|
+
Object[] record = records.get(1);
|
524
|
+
assertEquals(2L, record[0]);
|
525
|
+
assertEquals(14824L, record[1]);
|
526
|
+
assertEquals("2015-01-27 19:01:23 UTC", record[2].toString());
|
527
|
+
assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
|
528
|
+
assertEquals("embulk jruby", record[4]);
|
529
|
+
assertEquals("{\"k\":1}", record[5].toString());
|
530
|
+
}
|
531
|
+
|
532
|
+
{
|
533
|
+
Object[] record = records.get(2);
|
534
|
+
assertEquals("{\"k\":1.23}", record[5].toString());
|
535
|
+
}
|
536
|
+
|
537
|
+
{
|
538
|
+
Object[] record = records.get(3);
|
539
|
+
assertEquals("{\"k\":\"v\"}", record[5].toString());
|
540
|
+
}
|
541
|
+
|
542
|
+
{
|
543
|
+
Object[] record = records.get(4);
|
544
|
+
assertEquals("{\"k\":\"2015-02-03 08:13:45\"}", record[5].toString());
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
private List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
|
549
|
+
{
|
550
|
+
Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
|
551
|
+
return Pages.toObjects(schema, output.pages);
|
552
|
+
}
|
5
553
|
}
|