embulk-input-sftp 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +18 -0
- data/build.gradle +2 -1
- data/src/main/java/org/embulk/input/sftp/FileList.java +341 -0
- data/src/main/java/org/embulk/input/sftp/PluginTask.java +3 -5
- data/src/main/java/org/embulk/input/sftp/SftpFileInput.java +51 -16
- data/src/main/java/org/embulk/input/sftp/SftpFileInputPlugin.java +2 -15
- data/src/main/java/org/embulk/input/sftp/SingleFileProvider.java +11 -8
- data/src/test/java/org/embulk/input/sftp/TestFileList.java +87 -0
- data/src/test/java/org/embulk/input/sftp/TestSftpFileInputPlugin.java +548 -0
- data/src/test/resources/sample_01.csv +6 -0
- data/src/test/resources/sample_02.csv +6 -0
- metadata +8 -3
@@ -8,8 +8,6 @@ import org.embulk.spi.Exec;
|
|
8
8
|
import org.embulk.spi.FileInputPlugin;
|
9
9
|
import org.embulk.spi.TransactionalFileInput;
|
10
10
|
|
11
|
-
import java.util.ArrayList;
|
12
|
-
import java.util.Collections;
|
13
11
|
import java.util.List;
|
14
12
|
|
15
13
|
public class SftpFileInputPlugin
|
@@ -23,7 +21,7 @@ public class SftpFileInputPlugin
|
|
23
21
|
// list files recursively
|
24
22
|
task.setFiles(SftpFileInput.listFilesByPrefix(task));
|
25
23
|
// number of processors is same with number of files
|
26
|
-
return resume(task.dump(), task.getFiles().
|
24
|
+
return resume(task.dump(), task.getFiles().getTaskCount(), control);
|
27
25
|
}
|
28
26
|
|
29
27
|
@Override
|
@@ -32,21 +30,10 @@ public class SftpFileInputPlugin
|
|
32
30
|
FileInputPlugin.Control control)
|
33
31
|
{
|
34
32
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
35
|
-
|
36
33
|
control.run(taskSource, taskCount);
|
37
34
|
|
38
35
|
ConfigDiff configDiff = Exec.newConfigDiff();
|
39
|
-
|
40
|
-
List<String> files = new ArrayList<String>(task.getFiles());
|
41
|
-
if (files.isEmpty()) {
|
42
|
-
if (task.getLastPath().isPresent()) {
|
43
|
-
configDiff.set("last_path", task.getLastPath().get());
|
44
|
-
}
|
45
|
-
}
|
46
|
-
else {
|
47
|
-
Collections.sort(files);
|
48
|
-
configDiff.set("last_path", files.get(files.size() - 1));
|
49
|
-
}
|
36
|
+
configDiff.set("last_path", SftpFileInput.getRelativePath(task.getFiles().getLastPath(task.getLastPath())));
|
50
37
|
|
51
38
|
return configDiff;
|
52
39
|
}
|
@@ -2,6 +2,7 @@ package org.embulk.input.sftp;
|
|
2
2
|
|
3
3
|
import org.apache.commons.vfs2.FileObject;
|
4
4
|
import org.apache.commons.vfs2.FileSystemException;
|
5
|
+
import org.apache.commons.vfs2.FileSystemOptions;
|
5
6
|
import org.apache.commons.vfs2.impl.StandardFileSystemManager;
|
6
7
|
import org.embulk.spi.Exec;
|
7
8
|
import org.embulk.spi.util.InputStreamFileInput;
|
@@ -9,43 +10,45 @@ import org.slf4j.Logger;
|
|
9
10
|
|
10
11
|
import java.io.IOException;
|
11
12
|
import java.io.InputStream;
|
13
|
+
import java.util.Iterator;
|
12
14
|
|
13
15
|
public class SingleFileProvider
|
14
16
|
implements InputStreamFileInput.Provider
|
15
17
|
{
|
16
18
|
private final StandardFileSystemManager manager;
|
17
|
-
private final
|
18
|
-
private final
|
19
|
+
private final FileSystemOptions fsOptions;
|
20
|
+
private final Iterator<String> iterator;
|
19
21
|
private final int maxConnectionRetry;
|
20
22
|
private boolean opened = false;
|
21
23
|
private final Logger log = Exec.getLogger(SingleFileProvider.class);
|
22
24
|
|
23
|
-
public SingleFileProvider(PluginTask task, int taskIndex, StandardFileSystemManager manager)
|
25
|
+
public SingleFileProvider(PluginTask task, int taskIndex, StandardFileSystemManager manager, FileSystemOptions fsOptions)
|
24
26
|
{
|
25
27
|
this.manager = manager;
|
26
|
-
this.
|
27
|
-
this.
|
28
|
+
this.fsOptions = fsOptions;
|
29
|
+
this.iterator = task.getFiles().get(taskIndex).iterator();
|
28
30
|
this.maxConnectionRetry = task.getMaxConnectionRetry();
|
29
31
|
}
|
30
32
|
|
31
33
|
@Override
|
32
34
|
public InputStream openNext() throws IOException
|
33
35
|
{
|
34
|
-
if (opened) {
|
36
|
+
if (opened || !iterator.hasNext()) {
|
35
37
|
return null;
|
36
38
|
}
|
37
39
|
opened = true;
|
40
|
+
String key = iterator.next();
|
38
41
|
|
39
42
|
int count = 0;
|
40
43
|
while (true) {
|
41
44
|
try {
|
42
|
-
FileObject file = manager.resolveFile(key,
|
45
|
+
FileObject file = manager.resolveFile(key, fsOptions);
|
43
46
|
log.info("Starting to download file {}", key);
|
44
47
|
|
45
48
|
return file.getContent().getInputStream();
|
46
49
|
}
|
47
50
|
catch (FileSystemException ex) {
|
48
|
-
if (++count == maxConnectionRetry) {
|
51
|
+
if (++count == maxConnectionRetry || ex.getMessage().indexOf("Permission denied") > 0) {
|
49
52
|
throw ex;
|
50
53
|
}
|
51
54
|
log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
|
@@ -0,0 +1,87 @@
|
|
1
|
+
package org.embulk.input.sftp;
|
2
|
+
|
3
|
+
import org.embulk.EmbulkTestRuntime;
|
4
|
+
import org.embulk.config.ConfigSource;
|
5
|
+
import org.junit.Before;
|
6
|
+
import org.junit.Rule;
|
7
|
+
import org.junit.Test;
|
8
|
+
|
9
|
+
import static org.junit.Assert.assertEquals;
|
10
|
+
|
11
|
+
public class TestFileList
|
12
|
+
{
|
13
|
+
@Rule
|
14
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
15
|
+
|
16
|
+
private ConfigSource config;
|
17
|
+
|
18
|
+
@Before
|
19
|
+
public void createConfigSource()
|
20
|
+
{
|
21
|
+
config = runtime.getExec().newConfigSource();
|
22
|
+
}
|
23
|
+
|
24
|
+
@Test
|
25
|
+
public void checkMinTaskSize()
|
26
|
+
throws Exception
|
27
|
+
{
|
28
|
+
{ // not specify min_task_size
|
29
|
+
FileList fileList = newFileList(config.deepCopy(),
|
30
|
+
"sample_00", 100L,
|
31
|
+
"sample_01", 150L,
|
32
|
+
"sample_02", 350L);
|
33
|
+
|
34
|
+
assertEquals(3, fileList.getTaskCount());
|
35
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
36
|
+
assertEquals("sample_01", fileList.get(1).get(0));
|
37
|
+
assertEquals("sample_02", fileList.get(2).get(0));
|
38
|
+
}
|
39
|
+
|
40
|
+
{
|
41
|
+
FileList fileList = newFileList(config.deepCopy().set("min_task_size", 100),
|
42
|
+
"sample_00", 100L,
|
43
|
+
"sample_01", 150L,
|
44
|
+
"sample_02", 350L);
|
45
|
+
|
46
|
+
assertEquals(3, fileList.getTaskCount());
|
47
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
48
|
+
assertEquals("sample_01", fileList.get(1).get(0));
|
49
|
+
assertEquals("sample_02", fileList.get(2).get(0));
|
50
|
+
}
|
51
|
+
|
52
|
+
{
|
53
|
+
FileList fileList = newFileList(config.deepCopy().set("min_task_size", 200),
|
54
|
+
"sample_00", 100L,
|
55
|
+
"sample_01", 150L,
|
56
|
+
"sample_02", 350L);
|
57
|
+
|
58
|
+
assertEquals(2, fileList.getTaskCount());
|
59
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
60
|
+
assertEquals("sample_01", fileList.get(0).get(1));
|
61
|
+
assertEquals("sample_02", fileList.get(1).get(0));
|
62
|
+
}
|
63
|
+
|
64
|
+
{
|
65
|
+
FileList fileList = newFileList(config.deepCopy().set("min_task_size", 700),
|
66
|
+
"sample_00", 100L,
|
67
|
+
"sample_01", 150L,
|
68
|
+
"sample_02", 350L);
|
69
|
+
|
70
|
+
assertEquals(1, fileList.getTaskCount());
|
71
|
+
assertEquals("sample_00", fileList.get(0).get(0));
|
72
|
+
assertEquals("sample_01", fileList.get(0).get(1));
|
73
|
+
assertEquals("sample_02", fileList.get(0).get(2));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
private static FileList newFileList(ConfigSource config, Object... nameAndSize)
|
78
|
+
{
|
79
|
+
FileList.Builder builder = new FileList.Builder(config);
|
80
|
+
|
81
|
+
for (int i = 0; i < nameAndSize.length; i += 2) {
|
82
|
+
builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
|
83
|
+
}
|
84
|
+
|
85
|
+
return builder.build();
|
86
|
+
}
|
87
|
+
}
|
@@ -1,5 +1,553 @@
|
|
1
1
|
package org.embulk.input.sftp;
|
2
2
|
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.base.Throwables;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
6
|
+
import com.google.common.collect.ImmutableMap;
|
7
|
+
import com.google.common.collect.Lists;
|
8
|
+
import com.google.common.io.Resources;
|
9
|
+
import org.apache.commons.vfs2.FileObject;
|
10
|
+
import org.apache.commons.vfs2.FileSystemException;
|
11
|
+
import org.apache.commons.vfs2.FileSystemOptions;
|
12
|
+
import org.apache.commons.vfs2.Selectors;
|
13
|
+
import org.apache.commons.vfs2.impl.StandardFileSystemManager;
|
14
|
+
import org.apache.commons.vfs2.provider.sftp.SftpFileSystemConfigBuilder;
|
15
|
+
import org.apache.sshd.common.NamedFactory;
|
16
|
+
import org.apache.sshd.common.file.virtualfs.VirtualFileSystemFactory;
|
17
|
+
import org.apache.sshd.server.Command;
|
18
|
+
import org.apache.sshd.server.SshServer;
|
19
|
+
import org.apache.sshd.server.auth.password.PasswordAuthenticator;
|
20
|
+
import org.apache.sshd.server.auth.pubkey.PublickeyAuthenticator;
|
21
|
+
import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider;
|
22
|
+
import org.apache.sshd.server.scp.ScpCommandFactory;
|
23
|
+
import org.apache.sshd.server.session.ServerSession;
|
24
|
+
import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory;
|
25
|
+
import org.embulk.EmbulkTestRuntime;
|
26
|
+
import org.embulk.config.ConfigDiff;
|
27
|
+
import org.embulk.config.ConfigException;
|
28
|
+
import org.embulk.config.ConfigSource;
|
29
|
+
import org.embulk.config.TaskReport;
|
30
|
+
import org.embulk.config.TaskSource;
|
31
|
+
import org.embulk.spi.Exec;
|
32
|
+
import org.embulk.spi.FileInputPlugin;
|
33
|
+
import org.embulk.spi.FileInputRunner;
|
34
|
+
import org.embulk.spi.InputPlugin;
|
35
|
+
import org.embulk.spi.Schema;
|
36
|
+
import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
|
37
|
+
import org.embulk.spi.util.Pages;
|
38
|
+
import org.embulk.standards.CsvParserPlugin;
|
39
|
+
import org.hamcrest.CoreMatchers;
|
40
|
+
import org.junit.After;
|
41
|
+
import org.junit.Before;
|
42
|
+
import org.junit.Rule;
|
43
|
+
import org.junit.Test;
|
44
|
+
import org.junit.rules.ExpectedException;
|
45
|
+
import org.junit.rules.TemporaryFolder;
|
46
|
+
import org.littleshoot.proxy.HttpProxyServer;
|
47
|
+
import org.littleshoot.proxy.impl.DefaultHttpProxyServer;
|
48
|
+
import org.slf4j.Logger;
|
49
|
+
|
50
|
+
import java.io.IOException;
|
51
|
+
import java.lang.reflect.Method;
|
52
|
+
import java.security.PublicKey;
|
53
|
+
import java.util.ArrayList;
|
54
|
+
import java.util.Arrays;
|
55
|
+
import java.util.Collections;
|
56
|
+
import java.util.List;
|
57
|
+
|
58
|
+
import static org.junit.Assert.assertEquals;
|
59
|
+
|
3
60
|
public class TestSftpFileInputPlugin
|
4
61
|
{
|
62
|
+
@Rule
|
63
|
+
public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
|
64
|
+
|
65
|
+
@Rule
|
66
|
+
public ExpectedException exception = ExpectedException.none();
|
67
|
+
|
68
|
+
@Rule
|
69
|
+
public TemporaryFolder testFolder = new TemporaryFolder();
|
70
|
+
|
71
|
+
private Logger log = runtime.getExec().getLogger(TestSftpFileInputPlugin.class);
|
72
|
+
private ConfigSource config;
|
73
|
+
private SftpFileInputPlugin plugin;
|
74
|
+
private FileInputRunner runner;
|
75
|
+
private MockPageOutput output;
|
76
|
+
private SshServer sshServer;
|
77
|
+
private static final String HOST = "127.0.0.1";
|
78
|
+
private static final int PORT = 20022;
|
79
|
+
private static final String USERNAME = "username";
|
80
|
+
private static final String PASSWORD = "password";
|
81
|
+
private static final String REMOTE_DIRECTORY = "/home/username/unittest/";
|
82
|
+
private static final String SECRET_KEY_FILE = Resources.getResource("id_rsa").getPath();
|
83
|
+
private static final String SECRET_KEY_PASSPHRASE = "SECRET_KEY_PASSPHRASE";
|
84
|
+
private static final String PROXY_HOST = "127.0.0.1";
|
85
|
+
private static final int PROXY_PORT = 8080;
|
86
|
+
|
87
|
+
@Before
|
88
|
+
public void createResources() throws Exception
|
89
|
+
{
|
90
|
+
config = config();
|
91
|
+
plugin = new SftpFileInputPlugin();
|
92
|
+
runner = new FileInputRunner(runtime.getInstance(SftpFileInputPlugin.class));
|
93
|
+
output = new MockPageOutput();
|
94
|
+
|
95
|
+
if (!log.isDebugEnabled()) {
|
96
|
+
// TODO: change logging format: org.apache.commons.logging.Log
|
97
|
+
System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");
|
98
|
+
}
|
99
|
+
|
100
|
+
sshServer = createSshServer(HOST, PORT, USERNAME, PASSWORD);
|
101
|
+
}
|
102
|
+
|
103
|
+
@After
|
104
|
+
public void cleanup() throws InterruptedException
|
105
|
+
{
|
106
|
+
try {
|
107
|
+
sshServer.stop(true);
|
108
|
+
}
|
109
|
+
catch (Exception ex) {
|
110
|
+
log.debug(ex.getMessage(), ex);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
@Test
|
115
|
+
public void checkDefaultValues()
|
116
|
+
{
|
117
|
+
ConfigSource config = Exec.newConfigSource()
|
118
|
+
.set("host", HOST)
|
119
|
+
.set("user", USERNAME)
|
120
|
+
.set("password", PASSWORD)
|
121
|
+
.set("path_prefix", "")
|
122
|
+
.set("last_path", "")
|
123
|
+
.set("parser", parserConfig(schemaConfig()));
|
124
|
+
|
125
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
126
|
+
assertEquals(22, task.getPort());
|
127
|
+
assertEquals(true, task.getUserDirIsRoot());
|
128
|
+
assertEquals(600, task.getSftpConnectionTimeout());
|
129
|
+
assertEquals(5, task.getMaxConnectionRetry());
|
130
|
+
}
|
131
|
+
|
132
|
+
@Test(expected = ConfigException.class)
|
133
|
+
public void checkDefaultValuesHostIsNull()
|
134
|
+
{
|
135
|
+
ConfigSource config = Exec.newConfigSource()
|
136
|
+
.set("host", null)
|
137
|
+
.set("user", USERNAME)
|
138
|
+
.set("password", PASSWORD)
|
139
|
+
.set("path_prefix", "")
|
140
|
+
.set("last_path", "")
|
141
|
+
.set("parser", parserConfig(schemaConfig()));
|
142
|
+
|
143
|
+
runner.transaction(config, new Control());
|
144
|
+
}
|
145
|
+
|
146
|
+
@Test(expected = ConfigException.class)
|
147
|
+
public void checkDefaultValuesUserIsNull()
|
148
|
+
{
|
149
|
+
ConfigSource config = Exec.newConfigSource()
|
150
|
+
.set("host", HOST)
|
151
|
+
.set("user", null)
|
152
|
+
.set("password", PASSWORD)
|
153
|
+
.set("path_prefix", "")
|
154
|
+
.set("last_path", "")
|
155
|
+
.set("parser", parserConfig(schemaConfig()));
|
156
|
+
|
157
|
+
runner.transaction(config, new Control());
|
158
|
+
}
|
159
|
+
|
160
|
+
@Test
|
161
|
+
public void testResume()
|
162
|
+
{
|
163
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
164
|
+
task.setFiles(createFileList(Arrays.asList("in/aa/a"), task));
|
165
|
+
ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
|
166
|
+
{
|
167
|
+
@Override
|
168
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
169
|
+
{
|
170
|
+
return emptyTaskReports(taskCount);
|
171
|
+
}
|
172
|
+
});
|
173
|
+
assertEquals("in/aa/a", configDiff.get(String.class, "last_path"));
|
174
|
+
}
|
175
|
+
|
176
|
+
@Test
|
177
|
+
public void testCleanup()
|
178
|
+
{
|
179
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
180
|
+
plugin.cleanup(task.dump(), 0, Lists.<TaskReport>newArrayList()); // no errors happens
|
181
|
+
}
|
182
|
+
|
183
|
+
@Test
|
184
|
+
public void testListFiles() throws Exception
|
185
|
+
{
|
186
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
187
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
188
|
+
|
189
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
190
|
+
|
191
|
+
List<String> fileList = Arrays.asList(
|
192
|
+
SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY + "sample_01.csv"),
|
193
|
+
SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY + "sample_02.csv")
|
194
|
+
);
|
195
|
+
FileList expected = createFileList(fileList, task);
|
196
|
+
|
197
|
+
ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
|
198
|
+
@Override
|
199
|
+
public List<TaskReport> run(TaskSource taskSource, int taskCount)
|
200
|
+
{
|
201
|
+
assertEquals(2, taskCount);
|
202
|
+
return emptyTaskReports(taskCount);
|
203
|
+
}
|
204
|
+
});
|
205
|
+
|
206
|
+
Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
|
207
|
+
listFilesByPrefix.setAccessible(true);
|
208
|
+
FileList actual = (FileList) listFilesByPrefix.invoke(plugin, task);
|
209
|
+
|
210
|
+
assertEquals(expected.get(0), actual.get(0));
|
211
|
+
assertEquals(expected.get(1), actual.get(1));
|
212
|
+
assertEquals(SftpFileInput.getRelativePath(Optional.of(expected.get(1).get(0))), configDiff.get(String.class, "last_path"));
|
213
|
+
}
|
214
|
+
|
215
|
+
@Test
|
216
|
+
public void testSftpInputByOpen() throws Exception
|
217
|
+
{
|
218
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
219
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
220
|
+
|
221
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
222
|
+
runner.transaction(config, new Control());
|
223
|
+
|
224
|
+
Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
|
225
|
+
listFilesByPrefix.setAccessible(true);
|
226
|
+
task.setFiles((FileList) listFilesByPrefix.invoke(plugin, task));
|
227
|
+
|
228
|
+
assertRecords(config, output);
|
229
|
+
}
|
230
|
+
|
231
|
+
// @Test
|
232
|
+
// public void testSftpInputByOpenWithProxy() throws Exception
|
233
|
+
// {
|
234
|
+
// HttpProxyServer proxyServer = null;
|
235
|
+
// try {
|
236
|
+
// proxyServer = createProxyServer(PROXY_PORT);
|
237
|
+
//
|
238
|
+
// uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
239
|
+
// uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
240
|
+
//
|
241
|
+
// ConfigSource config = Exec.newConfigSource()
|
242
|
+
// .set("host", HOST)
|
243
|
+
// .set("port", PORT)
|
244
|
+
// .set("user", USERNAME)
|
245
|
+
// .set("password", PASSWORD)
|
246
|
+
// .set("path_prefix", REMOTE_DIRECTORY)
|
247
|
+
// .set("last_path", "")
|
248
|
+
// .set("proxy", proxyConfig())
|
249
|
+
// .set("parser", parserConfig(schemaConfig()));
|
250
|
+
//
|
251
|
+
// PluginTask task = config.loadConfig(PluginTask.class);
|
252
|
+
// runner.transaction(config, new Control());
|
253
|
+
//
|
254
|
+
// Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
|
255
|
+
// listFilesByPrefix.setAccessible(true);
|
256
|
+
// task.setFiles((FileList) listFilesByPrefix.invoke(plugin, task));
|
257
|
+
//
|
258
|
+
// assertRecords(config, output);
|
259
|
+
// log.info("config:", config);
|
260
|
+
// log.info("output:", output);
|
261
|
+
// }
|
262
|
+
// finally {
|
263
|
+
// if (proxyServer != null) {
|
264
|
+
// proxyServer.stop();
|
265
|
+
// }
|
266
|
+
// }
|
267
|
+
// }
|
268
|
+
|
269
|
+
@Test
|
270
|
+
public void testSftpInputByOpenTimeout() throws Exception
|
271
|
+
{
|
272
|
+
uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
|
273
|
+
uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
|
274
|
+
|
275
|
+
ConfigSource config = Exec.newConfigSource()
|
276
|
+
.set("host", HOST)
|
277
|
+
.set("port", PORT)
|
278
|
+
.set("user", "invalid-username")
|
279
|
+
.set("password", PASSWORD)
|
280
|
+
.set("path_prefix", REMOTE_DIRECTORY)
|
281
|
+
.set("max_connection_retry", 2)
|
282
|
+
.set("last_path", "")
|
283
|
+
.set("parser", parserConfig(schemaConfig()));
|
284
|
+
|
285
|
+
exception.expect(RuntimeException.class);
|
286
|
+
exception.expectCause(CoreMatchers.<Throwable>instanceOf(FileSystemException.class));
|
287
|
+
exception.expectMessage("Could not connect to SFTP server");
|
288
|
+
|
289
|
+
runner.transaction(config, new Control());
|
290
|
+
}
|
291
|
+
|
292
|
+
@Test
|
293
|
+
public void testProxyType()
|
294
|
+
{
|
295
|
+
// test valueOf()
|
296
|
+
assertEquals("http", ProxyTask.ProxyType.valueOf("HTTP").toString());
|
297
|
+
assertEquals("socks", ProxyTask.ProxyType.valueOf("SOCKS").toString());
|
298
|
+
assertEquals("stream", ProxyTask.ProxyType.valueOf("STREAM").toString());
|
299
|
+
try {
|
300
|
+
ProxyTask.ProxyType.valueOf("non-existing-type");
|
301
|
+
}
|
302
|
+
catch (Exception ex) {
|
303
|
+
assertEquals(IllegalArgumentException.class, ex.getClass());
|
304
|
+
}
|
305
|
+
|
306
|
+
// test fromString
|
307
|
+
assertEquals(ProxyTask.ProxyType.HTTP, ProxyTask.ProxyType.fromString("http"));
|
308
|
+
assertEquals(ProxyTask.ProxyType.SOCKS, ProxyTask.ProxyType.fromString("socks"));
|
309
|
+
assertEquals(ProxyTask.ProxyType.STREAM, ProxyTask.ProxyType.fromString("stream"));
|
310
|
+
try {
|
311
|
+
ProxyTask.ProxyType.fromString("non-existing-type");
|
312
|
+
}
|
313
|
+
catch (Exception ex) {
|
314
|
+
assertEquals(ConfigException.class, ex.getClass());
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
@Test
|
319
|
+
public void testSetProxyType() throws Exception
|
320
|
+
{
|
321
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
322
|
+
FileSystemOptions fsOptions = SftpFileInput.initializeFsOptions(task);
|
323
|
+
SftpFileSystemConfigBuilder builder = SftpFileSystemConfigBuilder.getInstance();
|
324
|
+
|
325
|
+
ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.HTTP);
|
326
|
+
assertEquals(SftpFileSystemConfigBuilder.PROXY_HTTP, builder.getProxyType(fsOptions));
|
327
|
+
|
328
|
+
ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.SOCKS);
|
329
|
+
assertEquals(SftpFileSystemConfigBuilder.PROXY_SOCKS5, builder.getProxyType(fsOptions));
|
330
|
+
|
331
|
+
ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.STREAM);
|
332
|
+
assertEquals(SftpFileSystemConfigBuilder.PROXY_STREAM, builder.getProxyType(fsOptions));
|
333
|
+
}
|
334
|
+
|
335
|
+
private SshServer createSshServer(String host, int port, final String sshUsername, final String sshPassword)
|
336
|
+
{
|
337
|
+
// setup a mock sftp server
|
338
|
+
SshServer sshServer = SshServer.setUpDefaultServer();
|
339
|
+
VirtualFileSystemFactory fsFactory = new VirtualFileSystemFactory();
|
340
|
+
fsFactory.setUserHomeDir(sshUsername, testFolder.getRoot().toPath());
|
341
|
+
sshServer.setFileSystemFactory(fsFactory);
|
342
|
+
sshServer.setHost(host);
|
343
|
+
sshServer.setPort(port);
|
344
|
+
sshServer.setSubsystemFactories(Collections.<NamedFactory<Command>>singletonList(new SftpSubsystemFactory()));
|
345
|
+
sshServer.setCommandFactory(new ScpCommandFactory());
|
346
|
+
sshServer.setKeyPairProvider(new SimpleGeneratorHostKeyProvider());
|
347
|
+
sshServer.setPasswordAuthenticator(new PasswordAuthenticator()
|
348
|
+
{
|
349
|
+
@Override
|
350
|
+
public boolean authenticate(final String username, final String password, final ServerSession session)
|
351
|
+
{
|
352
|
+
return sshUsername.contentEquals(username) && sshPassword.contentEquals(password);
|
353
|
+
}
|
354
|
+
});
|
355
|
+
sshServer.setPublickeyAuthenticator(new PublickeyAuthenticator()
|
356
|
+
{
|
357
|
+
@Override
|
358
|
+
public boolean authenticate(String username, PublicKey key, ServerSession session)
|
359
|
+
{
|
360
|
+
return true;
|
361
|
+
}
|
362
|
+
});
|
363
|
+
|
364
|
+
try {
|
365
|
+
sshServer.start();
|
366
|
+
}
|
367
|
+
catch (IOException ex) {
|
368
|
+
log.debug(ex.getMessage(), ex);
|
369
|
+
}
|
370
|
+
return sshServer;
|
371
|
+
}
|
372
|
+
|
373
|
+
private HttpProxyServer createProxyServer(int port)
|
374
|
+
{
|
375
|
+
return DefaultHttpProxyServer.bootstrap()
|
376
|
+
.withPort(port)
|
377
|
+
.start();
|
378
|
+
}
|
379
|
+
|
380
|
+
private void uploadFile(String localPath, String remotePath) throws Exception
|
381
|
+
{
|
382
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
383
|
+
|
384
|
+
FileSystemOptions fsOptions = SftpFileInput.initializeFsOptions(task);
|
385
|
+
String uri = SftpFileInput.getSftpFileUri(task, remotePath);
|
386
|
+
|
387
|
+
int count = 0;
|
388
|
+
while (true) {
|
389
|
+
try {
|
390
|
+
StandardFileSystemManager manager = new StandardFileSystemManager();
|
391
|
+
manager.init();
|
392
|
+
|
393
|
+
FileObject localFile = manager.resolveFile(localPath);
|
394
|
+
FileObject remoteFile = manager.resolveFile(uri, fsOptions);
|
395
|
+
remoteFile.copyFrom(localFile, Selectors.SELECT_SELF);
|
396
|
+
|
397
|
+
if (log.isDebugEnabled()) {
|
398
|
+
FileObject files = manager.resolveFile(SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY));
|
399
|
+
for (FileObject f : files.getChildren()) {
|
400
|
+
if (f.isFile()) {
|
401
|
+
log.debug("remote file list:" + f.toString());
|
402
|
+
}
|
403
|
+
}
|
404
|
+
}
|
405
|
+
return;
|
406
|
+
}
|
407
|
+
catch (FileSystemException ex) {
|
408
|
+
if (++count == task.getMaxConnectionRetry()) {
|
409
|
+
Throwables.propagate(ex);
|
410
|
+
}
|
411
|
+
log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
|
412
|
+
|
413
|
+
try {
|
414
|
+
long sleepTime = ((long) Math.pow(2, count) * 1000);
|
415
|
+
log.warn("sleep in next connection retry: {} milliseconds", sleepTime);
|
416
|
+
Thread.sleep(sleepTime); // milliseconds
|
417
|
+
}
|
418
|
+
catch (InterruptedException ex2) {
|
419
|
+
// Ignore this exception because this exception is just about `sleep`.
|
420
|
+
log.warn(ex2.getMessage(), ex2);
|
421
|
+
}
|
422
|
+
log.warn("retrying to connect sftp server: " + count + " times");
|
423
|
+
}
|
424
|
+
}
|
425
|
+
}
|
426
|
+
|
427
|
+
private FileList createFileList(List<String> fileList, PluginTask task)
|
428
|
+
{
|
429
|
+
FileList.Builder builder = new FileList.Builder(task);
|
430
|
+
for (String file : fileList) {
|
431
|
+
builder.add(file, 0);
|
432
|
+
}
|
433
|
+
return builder.build();
|
434
|
+
}
|
435
|
+
|
436
|
+
static List<TaskReport> emptyTaskReports(int taskCount)
|
437
|
+
{
|
438
|
+
ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
|
439
|
+
for (int i = 0; i < taskCount; i++) {
|
440
|
+
reports.add(Exec.newTaskReport());
|
441
|
+
}
|
442
|
+
return reports.build();
|
443
|
+
}
|
444
|
+
|
445
|
+
private class Control
|
446
|
+
implements InputPlugin.Control
|
447
|
+
{
|
448
|
+
@Override
|
449
|
+
public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
450
|
+
{
|
451
|
+
List<TaskReport> reports = new ArrayList<>();
|
452
|
+
for (int i = 0; i < taskCount; i++) {
|
453
|
+
reports.add(runner.run(taskSource, schema, i, output));
|
454
|
+
}
|
455
|
+
return reports;
|
456
|
+
}
|
457
|
+
}
|
458
|
+
|
459
|
+
private ConfigSource config()
|
460
|
+
{
|
461
|
+
return Exec.newConfigSource()
|
462
|
+
.set("host", HOST)
|
463
|
+
.set("port", PORT)
|
464
|
+
.set("user", USERNAME)
|
465
|
+
.set("password", PASSWORD)
|
466
|
+
.set("path_prefix", REMOTE_DIRECTORY)
|
467
|
+
.set("last_path", "")
|
468
|
+
.set("parser", parserConfig(schemaConfig()));
|
469
|
+
}
|
470
|
+
|
471
|
+
private ImmutableMap<String, Object> proxyConfig()
|
472
|
+
{
|
473
|
+
ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
|
474
|
+
builder.put("type", "http");
|
475
|
+
builder.put("host", PROXY_HOST);
|
476
|
+
builder.put("port", PROXY_PORT);
|
477
|
+
return builder.build();
|
478
|
+
}
|
479
|
+
|
480
|
+
private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
|
481
|
+
{
|
482
|
+
ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
|
483
|
+
builder.put("type", "csv");
|
484
|
+
builder.put("newline", "CRLF");
|
485
|
+
builder.put("delimiter", ",");
|
486
|
+
builder.put("quote", "\"");
|
487
|
+
builder.put("escape", "\"");
|
488
|
+
builder.put("trim_if_not_quoted", false);
|
489
|
+
builder.put("skip_header_lines", 1);
|
490
|
+
builder.put("allow_extra_columns", false);
|
491
|
+
builder.put("allow_optional_columns", false);
|
492
|
+
builder.put("columns", schemaConfig);
|
493
|
+
return builder.build();
|
494
|
+
}
|
495
|
+
|
496
|
+
private ImmutableList<Object> schemaConfig()
|
497
|
+
{
|
498
|
+
ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
|
499
|
+
builder.add(ImmutableMap.of("name", "id", "type", "long"));
|
500
|
+
builder.add(ImmutableMap.of("name", "account", "type", "long"));
|
501
|
+
builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
|
502
|
+
builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
|
503
|
+
builder.add(ImmutableMap.of("name", "comment", "type", "string"));
|
504
|
+
builder.add(ImmutableMap.of("name", "json_column", "type", "json"));
|
505
|
+
return builder.build();
|
506
|
+
}
|
507
|
+
|
508
|
+
private void assertRecords(ConfigSource config, MockPageOutput output)
|
509
|
+
{
|
510
|
+
List<Object[]> records = getRecords(config, output);
|
511
|
+
assertEquals(10, records.size());
|
512
|
+
{
|
513
|
+
Object[] record = records.get(0);
|
514
|
+
assertEquals(1L, record[0]);
|
515
|
+
assertEquals(32864L, record[1]);
|
516
|
+
assertEquals("2015-01-27 19:23:49 UTC", record[2].toString());
|
517
|
+
assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
|
518
|
+
assertEquals("embulk", record[4]);
|
519
|
+
assertEquals("{\"k\":true}", record[5].toString());
|
520
|
+
}
|
521
|
+
|
522
|
+
{
|
523
|
+
Object[] record = records.get(1);
|
524
|
+
assertEquals(2L, record[0]);
|
525
|
+
assertEquals(14824L, record[1]);
|
526
|
+
assertEquals("2015-01-27 19:01:23 UTC", record[2].toString());
|
527
|
+
assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
|
528
|
+
assertEquals("embulk jruby", record[4]);
|
529
|
+
assertEquals("{\"k\":1}", record[5].toString());
|
530
|
+
}
|
531
|
+
|
532
|
+
{
|
533
|
+
Object[] record = records.get(2);
|
534
|
+
assertEquals("{\"k\":1.23}", record[5].toString());
|
535
|
+
}
|
536
|
+
|
537
|
+
{
|
538
|
+
Object[] record = records.get(3);
|
539
|
+
assertEquals("{\"k\":\"v\"}", record[5].toString());
|
540
|
+
}
|
541
|
+
|
542
|
+
{
|
543
|
+
Object[] record = records.get(4);
|
544
|
+
assertEquals("{\"k\":\"2015-02-03 08:13:45\"}", record[5].toString());
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
private List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
|
549
|
+
{
|
550
|
+
Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
|
551
|
+
return Pages.toObjects(schema, output.pages);
|
552
|
+
}
|
5
553
|
}
|