embulk-input-sftp 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,8 +8,6 @@ import org.embulk.spi.Exec;
8
8
  import org.embulk.spi.FileInputPlugin;
9
9
  import org.embulk.spi.TransactionalFileInput;
10
10
 
11
- import java.util.ArrayList;
12
- import java.util.Collections;
13
11
  import java.util.List;
14
12
 
15
13
  public class SftpFileInputPlugin
@@ -23,7 +21,7 @@ public class SftpFileInputPlugin
23
21
  // list files recursively
24
22
  task.setFiles(SftpFileInput.listFilesByPrefix(task));
25
23
  // number of processors is same with number of files
26
- return resume(task.dump(), task.getFiles().size(), control);
24
+ return resume(task.dump(), task.getFiles().getTaskCount(), control);
27
25
  }
28
26
 
29
27
  @Override
@@ -32,21 +30,10 @@ public class SftpFileInputPlugin
32
30
  FileInputPlugin.Control control)
33
31
  {
34
32
  PluginTask task = taskSource.loadTask(PluginTask.class);
35
-
36
33
  control.run(taskSource, taskCount);
37
34
 
38
35
  ConfigDiff configDiff = Exec.newConfigDiff();
39
-
40
- List<String> files = new ArrayList<String>(task.getFiles());
41
- if (files.isEmpty()) {
42
- if (task.getLastPath().isPresent()) {
43
- configDiff.set("last_path", task.getLastPath().get());
44
- }
45
- }
46
- else {
47
- Collections.sort(files);
48
- configDiff.set("last_path", files.get(files.size() - 1));
49
- }
36
+ configDiff.set("last_path", SftpFileInput.getRelativePath(task.getFiles().getLastPath(task.getLastPath())));
50
37
 
51
38
  return configDiff;
52
39
  }
@@ -2,6 +2,7 @@ package org.embulk.input.sftp;
2
2
 
3
3
  import org.apache.commons.vfs2.FileObject;
4
4
  import org.apache.commons.vfs2.FileSystemException;
5
+ import org.apache.commons.vfs2.FileSystemOptions;
5
6
  import org.apache.commons.vfs2.impl.StandardFileSystemManager;
6
7
  import org.embulk.spi.Exec;
7
8
  import org.embulk.spi.util.InputStreamFileInput;
@@ -9,43 +10,45 @@ import org.slf4j.Logger;
9
10
 
10
11
  import java.io.IOException;
11
12
  import java.io.InputStream;
13
+ import java.util.Iterator;
12
14
 
13
15
  public class SingleFileProvider
14
16
  implements InputStreamFileInput.Provider
15
17
  {
16
18
  private final StandardFileSystemManager manager;
17
- private final String key;
18
- private final PluginTask task;
19
+ private final FileSystemOptions fsOptions;
20
+ private final Iterator<String> iterator;
19
21
  private final int maxConnectionRetry;
20
22
  private boolean opened = false;
21
23
  private final Logger log = Exec.getLogger(SingleFileProvider.class);
22
24
 
23
- public SingleFileProvider(PluginTask task, int taskIndex, StandardFileSystemManager manager)
25
+ public SingleFileProvider(PluginTask task, int taskIndex, StandardFileSystemManager manager, FileSystemOptions fsOptions)
24
26
  {
25
27
  this.manager = manager;
26
- this.key = task.getFiles().get(taskIndex);
27
- this.task = task;
28
+ this.fsOptions = fsOptions;
29
+ this.iterator = task.getFiles().get(taskIndex).iterator();
28
30
  this.maxConnectionRetry = task.getMaxConnectionRetry();
29
31
  }
30
32
 
31
33
  @Override
32
34
  public InputStream openNext() throws IOException
33
35
  {
34
- if (opened) {
36
+ if (opened || !iterator.hasNext()) {
35
37
  return null;
36
38
  }
37
39
  opened = true;
40
+ String key = iterator.next();
38
41
 
39
42
  int count = 0;
40
43
  while (true) {
41
44
  try {
42
- FileObject file = manager.resolveFile(key, SftpFileInput.initializeFsOptions(task));
45
+ FileObject file = manager.resolveFile(key, fsOptions);
43
46
  log.info("Starting to download file {}", key);
44
47
 
45
48
  return file.getContent().getInputStream();
46
49
  }
47
50
  catch (FileSystemException ex) {
48
- if (++count == maxConnectionRetry) {
51
+ if (++count == maxConnectionRetry || ex.getMessage().indexOf("Permission denied") > 0) {
49
52
  throw ex;
50
53
  }
51
54
  log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
@@ -0,0 +1,87 @@
1
+ package org.embulk.input.sftp;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.config.ConfigSource;
5
+ import org.junit.Before;
6
+ import org.junit.Rule;
7
+ import org.junit.Test;
8
+
9
+ import static org.junit.Assert.assertEquals;
10
+
11
+ public class TestFileList
12
+ {
13
+ @Rule
14
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
15
+
16
+ private ConfigSource config;
17
+
18
+ @Before
19
+ public void createConfigSource()
20
+ {
21
+ config = runtime.getExec().newConfigSource();
22
+ }
23
+
24
+ @Test
25
+ public void checkMinTaskSize()
26
+ throws Exception
27
+ {
28
+ { // not specify min_task_size
29
+ FileList fileList = newFileList(config.deepCopy(),
30
+ "sample_00", 100L,
31
+ "sample_01", 150L,
32
+ "sample_02", 350L);
33
+
34
+ assertEquals(3, fileList.getTaskCount());
35
+ assertEquals("sample_00", fileList.get(0).get(0));
36
+ assertEquals("sample_01", fileList.get(1).get(0));
37
+ assertEquals("sample_02", fileList.get(2).get(0));
38
+ }
39
+
40
+ {
41
+ FileList fileList = newFileList(config.deepCopy().set("min_task_size", 100),
42
+ "sample_00", 100L,
43
+ "sample_01", 150L,
44
+ "sample_02", 350L);
45
+
46
+ assertEquals(3, fileList.getTaskCount());
47
+ assertEquals("sample_00", fileList.get(0).get(0));
48
+ assertEquals("sample_01", fileList.get(1).get(0));
49
+ assertEquals("sample_02", fileList.get(2).get(0));
50
+ }
51
+
52
+ {
53
+ FileList fileList = newFileList(config.deepCopy().set("min_task_size", 200),
54
+ "sample_00", 100L,
55
+ "sample_01", 150L,
56
+ "sample_02", 350L);
57
+
58
+ assertEquals(2, fileList.getTaskCount());
59
+ assertEquals("sample_00", fileList.get(0).get(0));
60
+ assertEquals("sample_01", fileList.get(0).get(1));
61
+ assertEquals("sample_02", fileList.get(1).get(0));
62
+ }
63
+
64
+ {
65
+ FileList fileList = newFileList(config.deepCopy().set("min_task_size", 700),
66
+ "sample_00", 100L,
67
+ "sample_01", 150L,
68
+ "sample_02", 350L);
69
+
70
+ assertEquals(1, fileList.getTaskCount());
71
+ assertEquals("sample_00", fileList.get(0).get(0));
72
+ assertEquals("sample_01", fileList.get(0).get(1));
73
+ assertEquals("sample_02", fileList.get(0).get(2));
74
+ }
75
+ }
76
+
77
+ private static FileList newFileList(ConfigSource config, Object... nameAndSize)
78
+ {
79
+ FileList.Builder builder = new FileList.Builder(config);
80
+
81
+ for (int i = 0; i < nameAndSize.length; i += 2) {
82
+ builder.add((String) nameAndSize[i], (long) nameAndSize[i + 1]);
83
+ }
84
+
85
+ return builder.build();
86
+ }
87
+ }
@@ -1,5 +1,553 @@
1
1
  package org.embulk.input.sftp;
2
2
 
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
5
+ import com.google.common.collect.ImmutableList;
6
+ import com.google.common.collect.ImmutableMap;
7
+ import com.google.common.collect.Lists;
8
+ import com.google.common.io.Resources;
9
+ import org.apache.commons.vfs2.FileObject;
10
+ import org.apache.commons.vfs2.FileSystemException;
11
+ import org.apache.commons.vfs2.FileSystemOptions;
12
+ import org.apache.commons.vfs2.Selectors;
13
+ import org.apache.commons.vfs2.impl.StandardFileSystemManager;
14
+ import org.apache.commons.vfs2.provider.sftp.SftpFileSystemConfigBuilder;
15
+ import org.apache.sshd.common.NamedFactory;
16
+ import org.apache.sshd.common.file.virtualfs.VirtualFileSystemFactory;
17
+ import org.apache.sshd.server.Command;
18
+ import org.apache.sshd.server.SshServer;
19
+ import org.apache.sshd.server.auth.password.PasswordAuthenticator;
20
+ import org.apache.sshd.server.auth.pubkey.PublickeyAuthenticator;
21
+ import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider;
22
+ import org.apache.sshd.server.scp.ScpCommandFactory;
23
+ import org.apache.sshd.server.session.ServerSession;
24
+ import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory;
25
+ import org.embulk.EmbulkTestRuntime;
26
+ import org.embulk.config.ConfigDiff;
27
+ import org.embulk.config.ConfigException;
28
+ import org.embulk.config.ConfigSource;
29
+ import org.embulk.config.TaskReport;
30
+ import org.embulk.config.TaskSource;
31
+ import org.embulk.spi.Exec;
32
+ import org.embulk.spi.FileInputPlugin;
33
+ import org.embulk.spi.FileInputRunner;
34
+ import org.embulk.spi.InputPlugin;
35
+ import org.embulk.spi.Schema;
36
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
37
+ import org.embulk.spi.util.Pages;
38
+ import org.embulk.standards.CsvParserPlugin;
39
+ import org.hamcrest.CoreMatchers;
40
+ import org.junit.After;
41
+ import org.junit.Before;
42
+ import org.junit.Rule;
43
+ import org.junit.Test;
44
+ import org.junit.rules.ExpectedException;
45
+ import org.junit.rules.TemporaryFolder;
46
+ import org.littleshoot.proxy.HttpProxyServer;
47
+ import org.littleshoot.proxy.impl.DefaultHttpProxyServer;
48
+ import org.slf4j.Logger;
49
+
50
+ import java.io.IOException;
51
+ import java.lang.reflect.Method;
52
+ import java.security.PublicKey;
53
+ import java.util.ArrayList;
54
+ import java.util.Arrays;
55
+ import java.util.Collections;
56
+ import java.util.List;
57
+
58
+ import static org.junit.Assert.assertEquals;
59
+
3
60
  public class TestSftpFileInputPlugin
4
61
  {
62
+ @Rule
63
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
64
+
65
+ @Rule
66
+ public ExpectedException exception = ExpectedException.none();
67
+
68
+ @Rule
69
+ public TemporaryFolder testFolder = new TemporaryFolder();
70
+
71
+ private Logger log = runtime.getExec().getLogger(TestSftpFileInputPlugin.class);
72
+ private ConfigSource config;
73
+ private SftpFileInputPlugin plugin;
74
+ private FileInputRunner runner;
75
+ private MockPageOutput output;
76
+ private SshServer sshServer;
77
+ private static final String HOST = "127.0.0.1";
78
+ private static final int PORT = 20022;
79
+ private static final String USERNAME = "username";
80
+ private static final String PASSWORD = "password";
81
+ private static final String REMOTE_DIRECTORY = "/home/username/unittest/";
82
+ private static final String SECRET_KEY_FILE = Resources.getResource("id_rsa").getPath();
83
+ private static final String SECRET_KEY_PASSPHRASE = "SECRET_KEY_PASSPHRASE";
84
+ private static final String PROXY_HOST = "127.0.0.1";
85
+ private static final int PROXY_PORT = 8080;
86
+
87
+ @Before
88
+ public void createResources() throws Exception
89
+ {
90
+ config = config();
91
+ plugin = new SftpFileInputPlugin();
92
+ runner = new FileInputRunner(runtime.getInstance(SftpFileInputPlugin.class));
93
+ output = new MockPageOutput();
94
+
95
+ if (!log.isDebugEnabled()) {
96
+ // TODO: change logging format: org.apache.commons.logging.Log
97
+ System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");
98
+ }
99
+
100
+ sshServer = createSshServer(HOST, PORT, USERNAME, PASSWORD);
101
+ }
102
+
103
+ @After
104
+ public void cleanup() throws InterruptedException
105
+ {
106
+ try {
107
+ sshServer.stop(true);
108
+ }
109
+ catch (Exception ex) {
110
+ log.debug(ex.getMessage(), ex);
111
+ }
112
+ }
113
+
114
+ @Test
115
+ public void checkDefaultValues()
116
+ {
117
+ ConfigSource config = Exec.newConfigSource()
118
+ .set("host", HOST)
119
+ .set("user", USERNAME)
120
+ .set("password", PASSWORD)
121
+ .set("path_prefix", "")
122
+ .set("last_path", "")
123
+ .set("parser", parserConfig(schemaConfig()));
124
+
125
+ PluginTask task = config.loadConfig(PluginTask.class);
126
+ assertEquals(22, task.getPort());
127
+ assertEquals(true, task.getUserDirIsRoot());
128
+ assertEquals(600, task.getSftpConnectionTimeout());
129
+ assertEquals(5, task.getMaxConnectionRetry());
130
+ }
131
+
132
+ @Test(expected = ConfigException.class)
133
+ public void checkDefaultValuesHostIsNull()
134
+ {
135
+ ConfigSource config = Exec.newConfigSource()
136
+ .set("host", null)
137
+ .set("user", USERNAME)
138
+ .set("password", PASSWORD)
139
+ .set("path_prefix", "")
140
+ .set("last_path", "")
141
+ .set("parser", parserConfig(schemaConfig()));
142
+
143
+ runner.transaction(config, new Control());
144
+ }
145
+
146
+ @Test(expected = ConfigException.class)
147
+ public void checkDefaultValuesUserIsNull()
148
+ {
149
+ ConfigSource config = Exec.newConfigSource()
150
+ .set("host", HOST)
151
+ .set("user", null)
152
+ .set("password", PASSWORD)
153
+ .set("path_prefix", "")
154
+ .set("last_path", "")
155
+ .set("parser", parserConfig(schemaConfig()));
156
+
157
+ runner.transaction(config, new Control());
158
+ }
159
+
160
+ @Test
161
+ public void testResume()
162
+ {
163
+ PluginTask task = config.loadConfig(PluginTask.class);
164
+ task.setFiles(createFileList(Arrays.asList("in/aa/a"), task));
165
+ ConfigDiff configDiff = plugin.resume(task.dump(), 0, new FileInputPlugin.Control()
166
+ {
167
+ @Override
168
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
169
+ {
170
+ return emptyTaskReports(taskCount);
171
+ }
172
+ });
173
+ assertEquals("in/aa/a", configDiff.get(String.class, "last_path"));
174
+ }
175
+
176
+ @Test
177
+ public void testCleanup()
178
+ {
179
+ PluginTask task = config.loadConfig(PluginTask.class);
180
+ plugin.cleanup(task.dump(), 0, Lists.<TaskReport>newArrayList()); // no errors happens
181
+ }
182
+
183
+ @Test
184
+ public void testListFiles() throws Exception
185
+ {
186
+ uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
187
+ uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
188
+
189
+ PluginTask task = config.loadConfig(PluginTask.class);
190
+
191
+ List<String> fileList = Arrays.asList(
192
+ SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY + "sample_01.csv"),
193
+ SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY + "sample_02.csv")
194
+ );
195
+ FileList expected = createFileList(fileList, task);
196
+
197
+ ConfigDiff configDiff = plugin.transaction(config, new FileInputPlugin.Control() {
198
+ @Override
199
+ public List<TaskReport> run(TaskSource taskSource, int taskCount)
200
+ {
201
+ assertEquals(2, taskCount);
202
+ return emptyTaskReports(taskCount);
203
+ }
204
+ });
205
+
206
+ Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
207
+ listFilesByPrefix.setAccessible(true);
208
+ FileList actual = (FileList) listFilesByPrefix.invoke(plugin, task);
209
+
210
+ assertEquals(expected.get(0), actual.get(0));
211
+ assertEquals(expected.get(1), actual.get(1));
212
+ assertEquals(SftpFileInput.getRelativePath(Optional.of(expected.get(1).get(0))), configDiff.get(String.class, "last_path"));
213
+ }
214
+
215
+ @Test
216
+ public void testSftpInputByOpen() throws Exception
217
+ {
218
+ uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
219
+ uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
220
+
221
+ PluginTask task = config.loadConfig(PluginTask.class);
222
+ runner.transaction(config, new Control());
223
+
224
+ Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
225
+ listFilesByPrefix.setAccessible(true);
226
+ task.setFiles((FileList) listFilesByPrefix.invoke(plugin, task));
227
+
228
+ assertRecords(config, output);
229
+ }
230
+
231
+ // @Test
232
+ // public void testSftpInputByOpenWithProxy() throws Exception
233
+ // {
234
+ // HttpProxyServer proxyServer = null;
235
+ // try {
236
+ // proxyServer = createProxyServer(PROXY_PORT);
237
+ //
238
+ // uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
239
+ // uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
240
+ //
241
+ // ConfigSource config = Exec.newConfigSource()
242
+ // .set("host", HOST)
243
+ // .set("port", PORT)
244
+ // .set("user", USERNAME)
245
+ // .set("password", PASSWORD)
246
+ // .set("path_prefix", REMOTE_DIRECTORY)
247
+ // .set("last_path", "")
248
+ // .set("proxy", proxyConfig())
249
+ // .set("parser", parserConfig(schemaConfig()));
250
+ //
251
+ // PluginTask task = config.loadConfig(PluginTask.class);
252
+ // runner.transaction(config, new Control());
253
+ //
254
+ // Method listFilesByPrefix = SftpFileInput.class.getDeclaredMethod("listFilesByPrefix", PluginTask.class);
255
+ // listFilesByPrefix.setAccessible(true);
256
+ // task.setFiles((FileList) listFilesByPrefix.invoke(plugin, task));
257
+ //
258
+ // assertRecords(config, output);
259
+ // log.info("config:", config);
260
+ // log.info("output:", output);
261
+ // }
262
+ // finally {
263
+ // if (proxyServer != null) {
264
+ // proxyServer.stop();
265
+ // }
266
+ // }
267
+ // }
268
+
269
+ @Test
270
+ public void testSftpInputByOpenTimeout() throws Exception
271
+ {
272
+ uploadFile(Resources.getResource("sample_01.csv").getPath(), REMOTE_DIRECTORY + "sample_01.csv");
273
+ uploadFile(Resources.getResource("sample_02.csv").getPath(), REMOTE_DIRECTORY + "sample_02.csv");
274
+
275
+ ConfigSource config = Exec.newConfigSource()
276
+ .set("host", HOST)
277
+ .set("port", PORT)
278
+ .set("user", "invalid-username")
279
+ .set("password", PASSWORD)
280
+ .set("path_prefix", REMOTE_DIRECTORY)
281
+ .set("max_connection_retry", 2)
282
+ .set("last_path", "")
283
+ .set("parser", parserConfig(schemaConfig()));
284
+
285
+ exception.expect(RuntimeException.class);
286
+ exception.expectCause(CoreMatchers.<Throwable>instanceOf(FileSystemException.class));
287
+ exception.expectMessage("Could not connect to SFTP server");
288
+
289
+ runner.transaction(config, new Control());
290
+ }
291
+
292
+ @Test
293
+ public void testProxyType()
294
+ {
295
+ // test valueOf()
296
+ assertEquals("http", ProxyTask.ProxyType.valueOf("HTTP").toString());
297
+ assertEquals("socks", ProxyTask.ProxyType.valueOf("SOCKS").toString());
298
+ assertEquals("stream", ProxyTask.ProxyType.valueOf("STREAM").toString());
299
+ try {
300
+ ProxyTask.ProxyType.valueOf("non-existing-type");
301
+ }
302
+ catch (Exception ex) {
303
+ assertEquals(IllegalArgumentException.class, ex.getClass());
304
+ }
305
+
306
+ // test fromString
307
+ assertEquals(ProxyTask.ProxyType.HTTP, ProxyTask.ProxyType.fromString("http"));
308
+ assertEquals(ProxyTask.ProxyType.SOCKS, ProxyTask.ProxyType.fromString("socks"));
309
+ assertEquals(ProxyTask.ProxyType.STREAM, ProxyTask.ProxyType.fromString("stream"));
310
+ try {
311
+ ProxyTask.ProxyType.fromString("non-existing-type");
312
+ }
313
+ catch (Exception ex) {
314
+ assertEquals(ConfigException.class, ex.getClass());
315
+ }
316
+ }
317
+
318
+ @Test
319
+ public void testSetProxyType() throws Exception
320
+ {
321
+ PluginTask task = config.loadConfig(PluginTask.class);
322
+ FileSystemOptions fsOptions = SftpFileInput.initializeFsOptions(task);
323
+ SftpFileSystemConfigBuilder builder = SftpFileSystemConfigBuilder.getInstance();
324
+
325
+ ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.HTTP);
326
+ assertEquals(SftpFileSystemConfigBuilder.PROXY_HTTP, builder.getProxyType(fsOptions));
327
+
328
+ ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.SOCKS);
329
+ assertEquals(SftpFileSystemConfigBuilder.PROXY_SOCKS5, builder.getProxyType(fsOptions));
330
+
331
+ ProxyTask.ProxyType.setProxyType(builder, fsOptions, ProxyTask.ProxyType.STREAM);
332
+ assertEquals(SftpFileSystemConfigBuilder.PROXY_STREAM, builder.getProxyType(fsOptions));
333
+ }
334
+
335
+ private SshServer createSshServer(String host, int port, final String sshUsername, final String sshPassword)
336
+ {
337
+ // setup a mock sftp server
338
+ SshServer sshServer = SshServer.setUpDefaultServer();
339
+ VirtualFileSystemFactory fsFactory = new VirtualFileSystemFactory();
340
+ fsFactory.setUserHomeDir(sshUsername, testFolder.getRoot().toPath());
341
+ sshServer.setFileSystemFactory(fsFactory);
342
+ sshServer.setHost(host);
343
+ sshServer.setPort(port);
344
+ sshServer.setSubsystemFactories(Collections.<NamedFactory<Command>>singletonList(new SftpSubsystemFactory()));
345
+ sshServer.setCommandFactory(new ScpCommandFactory());
346
+ sshServer.setKeyPairProvider(new SimpleGeneratorHostKeyProvider());
347
+ sshServer.setPasswordAuthenticator(new PasswordAuthenticator()
348
+ {
349
+ @Override
350
+ public boolean authenticate(final String username, final String password, final ServerSession session)
351
+ {
352
+ return sshUsername.contentEquals(username) && sshPassword.contentEquals(password);
353
+ }
354
+ });
355
+ sshServer.setPublickeyAuthenticator(new PublickeyAuthenticator()
356
+ {
357
+ @Override
358
+ public boolean authenticate(String username, PublicKey key, ServerSession session)
359
+ {
360
+ return true;
361
+ }
362
+ });
363
+
364
+ try {
365
+ sshServer.start();
366
+ }
367
+ catch (IOException ex) {
368
+ log.debug(ex.getMessage(), ex);
369
+ }
370
+ return sshServer;
371
+ }
372
+
373
+ private HttpProxyServer createProxyServer(int port)
374
+ {
375
+ return DefaultHttpProxyServer.bootstrap()
376
+ .withPort(port)
377
+ .start();
378
+ }
379
+
380
+ private void uploadFile(String localPath, String remotePath) throws Exception
381
+ {
382
+ PluginTask task = config.loadConfig(PluginTask.class);
383
+
384
+ FileSystemOptions fsOptions = SftpFileInput.initializeFsOptions(task);
385
+ String uri = SftpFileInput.getSftpFileUri(task, remotePath);
386
+
387
+ int count = 0;
388
+ while (true) {
389
+ try {
390
+ StandardFileSystemManager manager = new StandardFileSystemManager();
391
+ manager.init();
392
+
393
+ FileObject localFile = manager.resolveFile(localPath);
394
+ FileObject remoteFile = manager.resolveFile(uri, fsOptions);
395
+ remoteFile.copyFrom(localFile, Selectors.SELECT_SELF);
396
+
397
+ if (log.isDebugEnabled()) {
398
+ FileObject files = manager.resolveFile(SftpFileInput.getSftpFileUri(task, REMOTE_DIRECTORY));
399
+ for (FileObject f : files.getChildren()) {
400
+ if (f.isFile()) {
401
+ log.debug("remote file list:" + f.toString());
402
+ }
403
+ }
404
+ }
405
+ return;
406
+ }
407
+ catch (FileSystemException ex) {
408
+ if (++count == task.getMaxConnectionRetry()) {
409
+ Throwables.propagate(ex);
410
+ }
411
+ log.warn("failed to connect sftp server: " + ex.getMessage(), ex);
412
+
413
+ try {
414
+ long sleepTime = ((long) Math.pow(2, count) * 1000);
415
+ log.warn("sleep in next connection retry: {} milliseconds", sleepTime);
416
+ Thread.sleep(sleepTime); // milliseconds
417
+ }
418
+ catch (InterruptedException ex2) {
419
+ // Ignore this exception because this exception is just about `sleep`.
420
+ log.warn(ex2.getMessage(), ex2);
421
+ }
422
+ log.warn("retrying to connect sftp server: " + count + " times");
423
+ }
424
+ }
425
+ }
426
+
427
+ private FileList createFileList(List<String> fileList, PluginTask task)
428
+ {
429
+ FileList.Builder builder = new FileList.Builder(task);
430
+ for (String file : fileList) {
431
+ builder.add(file, 0);
432
+ }
433
+ return builder.build();
434
+ }
435
+
436
+ static List<TaskReport> emptyTaskReports(int taskCount)
437
+ {
438
+ ImmutableList.Builder<TaskReport> reports = new ImmutableList.Builder<>();
439
+ for (int i = 0; i < taskCount; i++) {
440
+ reports.add(Exec.newTaskReport());
441
+ }
442
+ return reports.build();
443
+ }
444
+
445
+ private class Control
446
+ implements InputPlugin.Control
447
+ {
448
+ @Override
449
+ public List<TaskReport> run(TaskSource taskSource, Schema schema, int taskCount)
450
+ {
451
+ List<TaskReport> reports = new ArrayList<>();
452
+ for (int i = 0; i < taskCount; i++) {
453
+ reports.add(runner.run(taskSource, schema, i, output));
454
+ }
455
+ return reports;
456
+ }
457
+ }
458
+
459
+ private ConfigSource config()
460
+ {
461
+ return Exec.newConfigSource()
462
+ .set("host", HOST)
463
+ .set("port", PORT)
464
+ .set("user", USERNAME)
465
+ .set("password", PASSWORD)
466
+ .set("path_prefix", REMOTE_DIRECTORY)
467
+ .set("last_path", "")
468
+ .set("parser", parserConfig(schemaConfig()));
469
+ }
470
+
471
+ private ImmutableMap<String, Object> proxyConfig()
472
+ {
473
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
474
+ builder.put("type", "http");
475
+ builder.put("host", PROXY_HOST);
476
+ builder.put("port", PROXY_PORT);
477
+ return builder.build();
478
+ }
479
+
480
+ private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
481
+ {
482
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
483
+ builder.put("type", "csv");
484
+ builder.put("newline", "CRLF");
485
+ builder.put("delimiter", ",");
486
+ builder.put("quote", "\"");
487
+ builder.put("escape", "\"");
488
+ builder.put("trim_if_not_quoted", false);
489
+ builder.put("skip_header_lines", 1);
490
+ builder.put("allow_extra_columns", false);
491
+ builder.put("allow_optional_columns", false);
492
+ builder.put("columns", schemaConfig);
493
+ return builder.build();
494
+ }
495
+
496
+ private ImmutableList<Object> schemaConfig()
497
+ {
498
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
499
+ builder.add(ImmutableMap.of("name", "id", "type", "long"));
500
+ builder.add(ImmutableMap.of("name", "account", "type", "long"));
501
+ builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
502
+ builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
503
+ builder.add(ImmutableMap.of("name", "comment", "type", "string"));
504
+ builder.add(ImmutableMap.of("name", "json_column", "type", "json"));
505
+ return builder.build();
506
+ }
507
+
508
+ private void assertRecords(ConfigSource config, MockPageOutput output)
509
+ {
510
+ List<Object[]> records = getRecords(config, output);
511
+ assertEquals(10, records.size());
512
+ {
513
+ Object[] record = records.get(0);
514
+ assertEquals(1L, record[0]);
515
+ assertEquals(32864L, record[1]);
516
+ assertEquals("2015-01-27 19:23:49 UTC", record[2].toString());
517
+ assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
518
+ assertEquals("embulk", record[4]);
519
+ assertEquals("{\"k\":true}", record[5].toString());
520
+ }
521
+
522
+ {
523
+ Object[] record = records.get(1);
524
+ assertEquals(2L, record[0]);
525
+ assertEquals(14824L, record[1]);
526
+ assertEquals("2015-01-27 19:01:23 UTC", record[2].toString());
527
+ assertEquals("2015-01-27 00:00:00 UTC", record[3].toString());
528
+ assertEquals("embulk jruby", record[4]);
529
+ assertEquals("{\"k\":1}", record[5].toString());
530
+ }
531
+
532
+ {
533
+ Object[] record = records.get(2);
534
+ assertEquals("{\"k\":1.23}", record[5].toString());
535
+ }
536
+
537
+ {
538
+ Object[] record = records.get(3);
539
+ assertEquals("{\"k\":\"v\"}", record[5].toString());
540
+ }
541
+
542
+ {
543
+ Object[] record = records.get(4);
544
+ assertEquals("{\"k\":\"2015-02-03 08:13:45\"}", record[5].toString());
545
+ }
546
+ }
547
+
548
+ private List<Object[]> getRecords(ConfigSource config, MockPageOutput output)
549
+ {
550
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
551
+ return Pages.toObjects(schema, output.pages);
552
+ }
5
553
  }