embulk-input-ftp 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0333d873277223fc5c1d369fa1417e2481df6ac0
4
+ data.tar.gz: c3b61c7abde8c581303b993613e788378d133372
5
+ SHA512:
6
+ metadata.gz: 790052dc1eafaaa5aa093fa6ba0d12fe7ccc64b21de203fcbdc8495807b420549176e19524096a4b02765e26470cc7a1d04358c47e3878f59a6288fedb4941c6
7
+ data.tar.gz: 5e1755a9388bbeefb9c6a4e6f2c9e96833568a7ab6861fde7985b1c88f6995bd439a65e059f2c73175c102e845a61e35391f20e7f239abe40051aa646ecf405e
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
data/ChangeLog ADDED
@@ -0,0 +1,5 @@
1
+
2
+ Release 0.1.0 - 2015-04-29
3
+
4
+ * First release
5
+
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # FTP file input plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: file input
6
+ * **Resume supported**: yes
7
+ * **Cleanup supported**: yes
8
+
9
+ ## Configuration
10
+
11
+ - **host**: FTP server address (string, required)
12
+ - **port**: FTP server port number (integer, default: 21)
13
+ - **user**: user name to login (string, optional)
14
+ - **password**: password to login (string, default: `""`)
15
+ - **path_prefix** prefix of target keys (string, required)
16
+ - **passive_mode**: use passive mode (boolean, default: true)
17
+ - **ascii_mode**: use ASCII mode instead of binary mode (boolean, default: false)
18
+
19
+ ## Example
20
+
21
+ ```yaml
22
+ in:
23
+ type: ftp
24
+ host: ftp.example.net
25
+ port: 21
26
+ user: anonymous
27
+ password: "mypassword"
28
+ path_prefix: /ftp/file/path/prefix
29
+ ```
30
+
31
+ ## Build
32
+
33
+ ```
34
+ $ ./gradlew gem
35
+ ```
data/build.gradle ADDED
@@ -0,0 +1,73 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ }
11
+ configurations {
12
+ provided
13
+ }
14
+
15
+ version = "0.1.0"
16
+
17
+ dependencies {
18
+ compile "org.embulk:embulk-core:0.6.5"
19
+ provided "org.embulk:embulk-core:0.6.5"
20
+ compile files("libs/ftp4j-1.7.2.jar")
21
+ testCompile "junit:junit:4.+"
22
+ }
23
+
24
+ task classpath(type: Copy, dependsOn: ["jar"]) {
25
+ doFirst { file("classpath").deleteDir() }
26
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
27
+ into "classpath"
28
+ }
29
+ clean { delete "classpath" }
30
+
31
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
32
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
33
+ script "${project.name}.gemspec"
34
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
35
+ }
36
+
37
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
38
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
39
+ script "pkg/${project.name}-${project.version}.gem"
40
+ }
41
+
42
+ task "package"(dependsOn: ["gemspec", "classpath"]) << {
43
+ println "> Build succeeded."
44
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
45
+ }
46
+
47
+ task gemspec {
48
+ ext.gemspecFile = file("${project.name}.gemspec")
49
+ inputs.file "build.gradle"
50
+ outputs.file gemspecFile
51
+ doLast { gemspecFile.write($/
52
+ Gem::Specification.new do |spec|
53
+ spec.name = "${project.name}"
54
+ spec.version = "${project.version}"
55
+ spec.authors = ["Sadayuki Furuhashi"]
56
+ spec.summary = %[Ftp file input plugin for Embulk]
57
+ spec.description = %[Reads files stored on Ftp.]
58
+ spec.email = ["frsyuki@gmail.com"]
59
+ spec.licenses = ["Apache 2.0"]
60
+ spec.homepage = "https://github.com/embulk/embulk-input-ftp"
61
+
62
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
63
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
64
+ spec.require_paths = ["lib"]
65
+
66
+ #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
67
+ spec.add_development_dependency 'bundler', ['~> 1.0']
68
+ spec.add_development_dependency 'rake', ['>= 10.0']
69
+ end
70
+ /$)
71
+ }
72
+ }
73
+ clean { delete "${project.name}.gemspec" }
Binary file
@@ -0,0 +1,6 @@
1
+ #Wed Feb 04 13:46:12 PST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_input(
2
+ "ftp", "org.embulk.input.FtpFileInputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
Binary file
@@ -0,0 +1,583 @@
1
+ package org.embulk.input;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Collections;
6
+ import java.util.concurrent.Executors;
7
+ import java.util.concurrent.ExecutorService;
8
+ import java.io.IOException;
9
+ import java.io.InterruptedIOException;
10
+ import java.io.InputStream;
11
+ import java.nio.channels.Channels;
12
+ import org.slf4j.Logger;
13
+ import com.google.common.util.concurrent.ThreadFactoryBuilder;
14
+ import com.google.common.collect.ImmutableList;
15
+ import com.google.common.base.Optional;
16
+ import com.google.common.base.Throwables;
17
+ import com.google.common.base.Function;
18
+ import it.sauronsoftware.ftp4j.FTPClient;
19
+ import it.sauronsoftware.ftp4j.FTPFile;
20
+ import it.sauronsoftware.ftp4j.FTPConnector;
21
+ import it.sauronsoftware.ftp4j.FTPCommunicationListener;
22
+ import it.sauronsoftware.ftp4j.FTPDataTransferListener;
23
+ import it.sauronsoftware.ftp4j.FTPException;
24
+ import it.sauronsoftware.ftp4j.FTPIllegalReplyException;
25
+ import it.sauronsoftware.ftp4j.FTPDataTransferException;
26
+ import it.sauronsoftware.ftp4j.FTPAbortedException;
27
+ import it.sauronsoftware.ftp4j.FTPListParseException;
28
+ import org.embulk.config.CommitReport;
29
+ import org.embulk.config.Config;
30
+ import org.embulk.config.ConfigInject;
31
+ import org.embulk.config.ConfigDefault;
32
+ import org.embulk.config.ConfigDiff;
33
+ import org.embulk.config.ConfigSource;
34
+ import org.embulk.config.Task;
35
+ import org.embulk.config.TaskSource;
36
+ import org.embulk.spi.BufferAllocator;
37
+ import org.embulk.spi.Exec;
38
+ import org.embulk.spi.FileInputPlugin;
39
+ import org.embulk.spi.TransactionalFileInput;
40
+ import org.embulk.spi.util.InputStreamFileInput;
41
+ import org.embulk.input.ftp.BlockingTransfer;
42
+ import org.embulk.input.ftp.RetryableInputStream;
43
+ import org.embulk.input.ftp.RetryExecutor.Retryable;
44
+ import org.embulk.input.ftp.RetryExecutor.RetryGiveupException;
45
+ import static org.embulk.input.ftp.RetryExecutor.retryExecutor;
46
+
47
+ public class FtpFileInputPlugin
48
+ implements FileInputPlugin
49
+ {
50
+ private final Logger log = Exec.getLogger(FtpFileInputPlugin.class);
51
+
52
+ public interface PluginTask
53
+ extends Task
54
+ {
55
+ @Config("path_prefix")
56
+ public String getPathPrefix();
57
+
58
+ @Config("last_path")
59
+ @ConfigDefault("null")
60
+ public Optional<String> getLastPath();
61
+
62
+ @Config("host")
63
+ public String getHost();
64
+
65
+ @Config("port")
66
+ @ConfigDefault("21")
67
+ public int getPort();
68
+
69
+ @Config("user")
70
+ @ConfigDefault("null")
71
+ public Optional<String> getUser();
72
+
73
+ @Config("password")
74
+ @ConfigDefault("null")
75
+ public Optional<String> getPassword();
76
+
77
+ @Config("passive_mode")
78
+ @ConfigDefault("true")
79
+ public boolean getPassiveMode();
80
+
81
+ @Config("ascii_mode")
82
+ @ConfigDefault("false")
83
+ public boolean getAsciiMode();
84
+
85
+ public List<String> getFiles();
86
+ public void setFiles(List<String> files);
87
+
88
+ @ConfigInject
89
+ public BufferAllocator getBufferAllocator();
90
+ }
91
+
92
+ @Override
93
+ public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
94
+ {
95
+ PluginTask task = config.loadConfig(PluginTask.class);
96
+
97
+ // list files recursively
98
+ List<String> files = listFiles(log, task);
99
+ task.setFiles(files);
100
+ log.info("Using files {}", files);
101
+
102
+ // TODO what if task.getFiles().isEmpty()?
103
+
104
+ // number of processors is same with number of files
105
+ return resume(task.dump(), task.getFiles().size(), control);
106
+ }
107
+
108
+ @Override
109
+ public ConfigDiff resume(TaskSource taskSource,
110
+ int taskCount,
111
+ FileInputPlugin.Control control)
112
+ {
113
+ PluginTask task = taskSource.loadTask(PluginTask.class);
114
+
115
+ control.run(taskSource, taskCount);
116
+
117
+ // build next config
118
+ ConfigDiff configDiff = Exec.newConfigDiff();
119
+
120
+ // last_path
121
+ if (task.getFiles().isEmpty()) {
122
+ // keep the last value
123
+ if (task.getLastPath().isPresent()) {
124
+ configDiff.set("last_path", task.getLastPath().get());
125
+ }
126
+ } else {
127
+ List<String> files = new ArrayList<String>(task.getFiles());
128
+ Collections.sort(files);
129
+ configDiff.set("last_path", files.get(files.size() - 1));
130
+ }
131
+
132
+ return configDiff;
133
+ }
134
+
135
+ @Override
136
+ public void cleanup(TaskSource taskSource,
137
+ int taskCount,
138
+ List<CommitReport> successCommitReports)
139
+ {
140
+ // do nothing
141
+ }
142
+
143
+ private static FTPClient newFTPClient(Logger log, PluginTask task)
144
+ {
145
+ FTPClient client = new FTPClient();
146
+ try {
147
+ // TODO SSL
148
+
149
+ client.addCommunicationListener(new LoggingCommunicationListner(log));
150
+
151
+ // TODO configurable timeout parameters
152
+ client.setAutoNoopTimeout(3000);
153
+
154
+ FTPConnector con = client.getConnector();
155
+ con.setConnectionTimeout(30);
156
+ con.setReadTimeout(60);
157
+ con.setCloseTimeout(60);
158
+
159
+ // for commons-net client
160
+ //client.setControlKeepAliveTimeout
161
+ //client.setConnectTimeout
162
+ //client.setSoTimeout
163
+ //client.setDataTimeout
164
+ //client.setAutodetectUTF8
165
+
166
+ log.info("Connecting to "+task.getHost());
167
+ client.connect(task.getHost(), task.getPort());
168
+
169
+ if (task.getUser().isPresent()) {
170
+ log.info("Logging in with user "+task.getUser().get());
171
+ client.login(task.getUser().get(), task.getPassword().or(""));
172
+ }
173
+
174
+ log.info("Using passive mode");
175
+ client.setPassive(task.getPassiveMode());
176
+
177
+ if (task.getAsciiMode()) {
178
+ log.info("Using ASCII mode");
179
+ client.setType(FTPClient.TYPE_TEXTUAL);
180
+ } else {
181
+ log.info("Using binary mode");
182
+ client.setType(FTPClient.TYPE_BINARY);
183
+ }
184
+
185
+ if (client.isCompressionSupported()) {
186
+ log.info("Using MODE Z compression");
187
+ client.setCompressionEnabled(true);
188
+ }
189
+
190
+ FTPClient connected = client;
191
+ client = null;
192
+ return connected;
193
+
194
+ } catch (FTPException ex) {
195
+ log.info("FTP command failed: "+ex.getCode()+" "+ex.getMessage());
196
+ throw Throwables.propagate(ex);
197
+
198
+ } catch (FTPIllegalReplyException ex) {
199
+ log.info("FTP protocol error");
200
+ throw Throwables.propagate(ex);
201
+
202
+ } catch (IOException ex) {
203
+ log.info("FTP network error: "+ex);
204
+ throw Throwables.propagate(ex);
205
+
206
+ } finally {
207
+ if (client != null) {
208
+ disconnectClient(client);
209
+ }
210
+ }
211
+ }
212
+
213
+ static void disconnectClient(FTPClient client)
214
+ {
215
+ if (client.isConnected()) {
216
+ try {
217
+ client.disconnect(false);
218
+ } catch (FTPException ex) {
219
+ // do nothing
220
+ } catch (FTPIllegalReplyException ex) {
221
+ // do nothing
222
+ } catch (IOException ex) {
223
+ // do nothing
224
+ }
225
+ }
226
+ }
227
+
228
+ private List<String> listFiles(Logger log, PluginTask task)
229
+ {
230
+ FTPClient client = newFTPClient(log, task);
231
+ try {
232
+ return listFilesByPrefix(log, client, task.getPathPrefix(), task.getLastPath());
233
+ } finally {
234
+ disconnectClient(client);
235
+ }
236
+ }
237
+
238
+ public static List<String> listFilesByPrefix(Logger log, FTPClient client,
239
+ String prefix, Optional<String> lastPath)
240
+ {
241
+ String directory;
242
+ String fileNamePrefix;
243
+ if (prefix.isEmpty()) {
244
+ directory = "";
245
+ fileNamePrefix = "";
246
+ } else {
247
+ int pos = prefix.lastIndexOf("/");
248
+ if (pos < 0) {
249
+ directory = "";
250
+ fileNamePrefix = prefix;
251
+ } else {
252
+ directory = prefix.substring(0, pos + 1); // include last "/"
253
+ fileNamePrefix = prefix.substring(pos + 1);
254
+ }
255
+ }
256
+
257
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
258
+
259
+ try {
260
+ String currentDirectory = client.currentDirectory();
261
+ log.info("Listing ftp files at directory '{}' filtering filename by prefix '{}'", directory.isEmpty() ? currentDirectory : directory, fileNamePrefix);
262
+
263
+ if (!directory.isEmpty()) {
264
+ client.changeDirectory(directory);
265
+ currentDirectory = directory;
266
+ }
267
+
268
+ for (FTPFile file : client.list()) {
269
+ if (file.getName().startsWith(fileNamePrefix)) {
270
+ listFilesRecursive(client, currentDirectory, file, builder);
271
+ }
272
+ }
273
+
274
+ } catch (FTPListParseException ex) {
275
+ log.info("FTP listing files failed");
276
+ throw Throwables.propagate(ex);
277
+
278
+ } catch (FTPAbortedException ex) {
279
+ log.info("FTP listing files failed");
280
+ throw Throwables.propagate(ex);
281
+
282
+ } catch (FTPDataTransferException ex) {
283
+ log.info("FTP data transfer failed");
284
+ throw Throwables.propagate(ex);
285
+
286
+ } catch (FTPException ex) {
287
+ log.info("FTP command failed: "+ex.getCode()+" "+ex.getMessage());
288
+ throw Throwables.propagate(ex);
289
+
290
+ } catch (FTPIllegalReplyException ex) {
291
+ log.info("FTP protocol error");
292
+ throw Throwables.propagate(ex);
293
+
294
+ } catch (IOException ex) {
295
+ log.info("FTP network error: "+ex);
296
+ throw Throwables.propagate(ex);
297
+ }
298
+
299
+ return builder.build();
300
+ }
301
+
302
+ private static void listFilesRecursive(FTPClient client,
303
+ String baseDirectoryPath, FTPFile file,
304
+ ImmutableList.Builder<String> builder)
305
+ throws IOException, FTPException, FTPIllegalReplyException, FTPDataTransferException, FTPAbortedException, FTPListParseException
306
+ {
307
+ if (!baseDirectoryPath.endsWith("/")) {
308
+ baseDirectoryPath = baseDirectoryPath + "/";
309
+ }
310
+ String path = baseDirectoryPath + file.getName();
311
+
312
+ switch (file.getType()) {
313
+ case FTPFile.TYPE_FILE:
314
+ builder.add(path);
315
+ break;
316
+ case FTPFile.TYPE_DIRECTORY:
317
+ client.changeDirectory(path);
318
+ for (FTPFile subFile : client.list()) {
319
+ listFilesRecursive(client, path, subFile, builder);
320
+ }
321
+ client.changeDirectory(baseDirectoryPath);
322
+ break;
323
+ case FTPFile.TYPE_LINK:
324
+ // TODO
325
+ }
326
+ }
327
+
328
+ @Override
329
+ public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
330
+ {
331
+ PluginTask task = taskSource.loadTask(PluginTask.class);
332
+ return new FtpFileInput(log, task, taskIndex);
333
+ }
334
+
335
+ private static class LoggingCommunicationListner
336
+ implements FTPCommunicationListener
337
+ {
338
+ private final Logger log;
339
+
340
+ public LoggingCommunicationListner(Logger log)
341
+ {
342
+ this.log = log;
343
+ }
344
+
345
+ public void received(String statement)
346
+ {
347
+ log.info("< "+statement);
348
+ }
349
+
350
+ public void sent(String statement)
351
+ {
352
+ if (statement.startsWith("PASS")) {
353
+ // don't show password
354
+ return;
355
+ }
356
+ log.info("> "+statement);
357
+ }
358
+ }
359
+
360
+ private static class LoggingTransferListener
361
+ implements FTPDataTransferListener
362
+ {
363
+ private final Logger log;
364
+ private final long transferNoticeBytes;
365
+
366
+ private long totalTransfer;
367
+ private long nextTransferNotice;
368
+
369
+ public LoggingTransferListener(Logger log, long transferNoticeBytes)
370
+ {
371
+ this.log = log;
372
+ this.transferNoticeBytes = transferNoticeBytes;
373
+ this.nextTransferNotice = transferNoticeBytes;
374
+ }
375
+
376
+ public void started()
377
+ {
378
+ log.info("Transfer started");
379
+ }
380
+
381
+ public void transferred(int length)
382
+ {
383
+ totalTransfer += length;
384
+ if (totalTransfer > nextTransferNotice) {
385
+ log.info("Transferred "+totalTransfer+" bytes");
386
+ nextTransferNotice = ((totalTransfer / transferNoticeBytes)+1) * transferNoticeBytes;
387
+ }
388
+ }
389
+
390
+ public void completed()
391
+ {
392
+ log.info("Transfer completed "+totalTransfer+" bytes");
393
+ }
394
+
395
+ public void aborted()
396
+ {
397
+ log.info("Transfer aborted");
398
+ }
399
+
400
+ public void failed()
401
+ {
402
+ log.info("Transfer failed");
403
+ }
404
+ }
405
+
406
+ private static final long TRANSFER_NOTICE_BYTES = 100*1024*1024;
407
+
408
+ private static InputStream startDownload(final Logger log, final FTPClient client,
409
+ final String path, final long offset, ExecutorService executor)
410
+ {
411
+ BlockingTransfer t = BlockingTransfer.submit(executor,
412
+ new Function<BlockingTransfer, Runnable>()
413
+ {
414
+ public Runnable apply(final BlockingTransfer transfer)
415
+ {
416
+ return new Runnable() {
417
+ public void run()
418
+ {
419
+ try {
420
+ client.download(path, Channels.newOutputStream(transfer.getWriterChannel()), offset, new LoggingTransferListener(log, TRANSFER_NOTICE_BYTES));
421
+
422
+ } catch (FTPException ex) {
423
+ log.info("FTP command failed: "+ex.getCode()+" "+ex.getMessage());
424
+ throw Throwables.propagate(ex);
425
+
426
+ } catch (FTPDataTransferException ex) {
427
+ log.info("FTP data transfer failed");
428
+ throw Throwables.propagate(ex);
429
+
430
+ } catch (FTPAbortedException ex) {
431
+ log.info("FTP listing files failed");
432
+ throw Throwables.propagate(ex);
433
+
434
+ } catch (FTPIllegalReplyException ex) {
435
+ log.info("FTP protocol error");
436
+ throw Throwables.propagate(ex);
437
+
438
+ } catch (IOException ex) {
439
+ throw Throwables.propagate(ex);
440
+
441
+ } finally {
442
+ try {
443
+ transfer.getWriterChannel().close();
444
+ } catch (IOException ex) {
445
+ throw new RuntimeException(ex);
446
+ }
447
+ }
448
+ }
449
+ };
450
+ }
451
+ });
452
+ return Channels.newInputStream(t.getReaderChannel());
453
+ }
454
+
455
+ private static class FtpRetryableOpener
456
+ implements RetryableInputStream.Opener
457
+ {
458
+ private final Logger log;
459
+ private final FTPClient client;
460
+ private final ExecutorService executor;
461
+ private final String path;
462
+
463
+ public FtpRetryableOpener(Logger log, FTPClient client, ExecutorService executor, String path)
464
+ {
465
+ this.log = log;
466
+ this.client = client;
467
+ this.executor = executor;
468
+ this.path = path;
469
+ }
470
+
471
+ @Override
472
+ public InputStream open(final long offset, final Exception exception) throws IOException
473
+ {
474
+ try {
475
+ return retryExecutor()
476
+ .withRetryLimit(3)
477
+ .withInitialRetryWait(500)
478
+ .withMaxRetryWait(30*1000)
479
+ .runInterruptible(new Retryable<InputStream>() {
480
+ @Override
481
+ public InputStream call() throws InterruptedIOException
482
+ {
483
+ log.warn(String.format("FTP read failed. Retrying GET request with %,d bytes offset", offset), exception);
484
+ return startDownload(log, client, path, offset, executor);
485
+ }
486
+
487
+ @Override
488
+ public boolean isRetryableException(Exception exception)
489
+ {
490
+ return true; // TODO
491
+ }
492
+
493
+ @Override
494
+ public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
495
+ throws RetryGiveupException
496
+ {
497
+ String message = String.format("FTP GET request failed. Retrying %d/%d after %d seconds. Message: %s",
498
+ retryCount, retryLimit, retryWait/1000, exception.getMessage());
499
+ if (retryCount % 3 == 0) {
500
+ log.warn(message, exception);
501
+ } else {
502
+ log.warn(message);
503
+ }
504
+ }
505
+
506
+ @Override
507
+ public void onGiveup(Exception firstException, Exception lastException)
508
+ throws RetryGiveupException
509
+ {
510
+ }
511
+ });
512
+ } catch (RetryGiveupException ex) {
513
+ Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
514
+ throw Throwables.propagate(ex.getCause());
515
+ } catch (InterruptedException ex) {
516
+ throw new InterruptedIOException();
517
+ }
518
+ }
519
+ }
520
+
521
+ // TODO create single-file InputStreamFileInput utility
522
+ private static class SingleFileProvider
523
+ implements InputStreamFileInput.Provider
524
+ {
525
+ private final Logger log;
526
+ private final FTPClient client;
527
+ private final ExecutorService executor;
528
+ private final String path;
529
+ private boolean opened = false;
530
+
531
+ public SingleFileProvider(Logger log, PluginTask task, int taskIndex)
532
+ {
533
+ this.log = log;
534
+ this.client = newFTPClient(log, task);
535
+ this.executor = Executors.newCachedThreadPool(
536
+ new ThreadFactoryBuilder()
537
+ .setNameFormat("embulk-input-ftp-%d")
538
+ .setDaemon(true)
539
+ .build());
540
+ this.path = task.getFiles().get(taskIndex);
541
+ }
542
+
543
+ @Override
544
+ public InputStream openNext() throws IOException
545
+ {
546
+ if (opened) {
547
+ return null;
548
+ }
549
+ opened = true;
550
+
551
+ return new RetryableInputStream(
552
+ startDownload(log, client, path, 0L, executor),
553
+ new FtpRetryableOpener(log, client, executor, path));
554
+ }
555
+
556
+ @Override
557
+ public void close()
558
+ {
559
+ try {
560
+ executor.shutdownNow();
561
+ } finally {
562
+ disconnectClient(client);
563
+ }
564
+ }
565
+ }
566
+
567
+ public static class FtpFileInput
568
+ extends InputStreamFileInput
569
+ implements TransactionalFileInput
570
+ {
571
+ public FtpFileInput(Logger log, PluginTask task, int taskIndex)
572
+ {
573
+ super(task.getBufferAllocator(), new SingleFileProvider(log, task, taskIndex));
574
+ }
575
+
576
+ public void abort() { }
577
+
578
+ public CommitReport commit()
579
+ {
580
+ return Exec.newCommitReport();
581
+ }
582
+ }
583
+ }