embulk-output-sftp 0.1.10 → 0.1.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -3
- data/CHANGELOG.md +3 -0
- data/README.md +3 -0
- data/build.gradle +6 -5
- data/src/main/java/org/embulk/output/sftp/SftpFileOutputPlugin.java +20 -3
- data/src/main/java/org/embulk/output/sftp/SftpLocalFileOutput.java +223 -0
- data/src/main/java/org/embulk/output/sftp/SftpRemoteFileOutput.java +120 -0
- data/src/main/java/org/embulk/output/sftp/SftpUtils.java +146 -123
- data/src/main/java/org/embulk/output/sftp/utils/DefaultRetry.java +55 -0
- data/src/main/java/org/embulk/output/sftp/utils/TimedCallable.java +27 -0
- data/src/main/java/org/embulk/output/sftp/utils/TimeoutCloser.java +42 -0
- data/src/test/java/org/embulk/output/sftp/TestSftpFileOutputPlugin.java +377 -8
- data/src/test/java/org/embulk/output/sftp/utils/TestTimedCallable.java +36 -0
- data/src/test/java/org/embulk/output/sftp/utils/TestTimeoutCloser.java +45 -0
- metadata +11 -5
- data/src/main/java/org/embulk/output/sftp/SftpFileOutput.java +0 -140
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98edf71b4a5d7998403671b7e181b250f7db9981
|
4
|
+
data.tar.gz: 940f0d1fea8ed83c957bdfca08a2bb89725b487b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4e8426ac217df25329da8a514b1603158bc7d6ed2b630b2e28486fcd36fb81e24aee588e5b1b2f6dea2e0996789a947eaac617330cb6180ca3e6afc35a9f0ed
|
7
|
+
data.tar.gz: 99c6f36a658a92dd3710c9b305892e5961f9d154694570b5f05e7cbf6e4ad2794adfda169e175622f3bb7ff5aa5714e25368e41a0f6224d3dce8acc004b87d9d
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
0.1.11 (2018-08-27)
|
2
|
+
- Enhance: Add 2 new configs `local_temp_file` (boolean) and `temp_file_threshold` (long)
|
3
|
+
- https://github.com/embulk/embulk-output-sftp/pull/50
|
1
4
|
0.1.10 (2018-05-07)
|
2
5
|
- Fix: Use java.util.regex.Pattern for host name validation
|
3
6
|
- https://github.com/embulk/embulk-output-sftp/pull/49
|
data/README.md
CHANGED
@@ -24,6 +24,8 @@ Stores files on a SFTP Server
|
|
24
24
|
- **file_ext**: Extension of output files (string, required)
|
25
25
|
- **sequence_format**: Format for sequence part of output files (string, default: `".%03d.%02d"`)
|
26
26
|
- **rename_file_after_upload**: Upload `file_ext` + ".tmp" first, then rename it after upload finish (boolean, default: `false`)
|
27
|
+
- **local_buffering**: Use local temp file to buffer records. If `false`, plugin will buffer records to remote file directly, with ".tmp" as filename suffix (boolean, default: `true`)
|
28
|
+
- **temp_file_threshold**: Maximum file size of local temp file, plugin will flush (append) to remote file when local temp file reaches threshold (long, default: `5368709120`, ie. 5GiB, min: 50MiB, max: 10GiB)
|
27
29
|
|
28
30
|
### Proxy configuration
|
29
31
|
|
@@ -53,6 +55,7 @@ out:
|
|
53
55
|
path_prefix: /data/sftp
|
54
56
|
file_ext: _20151020.tsv
|
55
57
|
sequence_format: ".%01d%01d"
|
58
|
+
temp_file_threshold: 10737418240 # 10GiB
|
56
59
|
```
|
57
60
|
|
58
61
|
With proxy
|
data/build.gradle
CHANGED
@@ -17,23 +17,24 @@ configurations {
|
|
17
17
|
}
|
18
18
|
|
19
19
|
group = "org.embulk.output.sftp"
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.11"
|
21
21
|
sourceCompatibility = 1.7
|
22
22
|
targetCompatibility = 1.7
|
23
23
|
|
24
24
|
dependencies {
|
25
|
-
compile "org.embulk:embulk-core:0.
|
26
|
-
provided "org.embulk:embulk-core:0.
|
25
|
+
compile "org.embulk:embulk-core:0.9.7"
|
26
|
+
provided "org.embulk:embulk-core:0.9.7"
|
27
27
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
28
28
|
compile "org.apache.commons:commons-vfs2:2.2"
|
29
29
|
compile "commons-io:commons-io:2.6"
|
30
30
|
compile "com.jcraft:jsch:0.1.54"
|
31
31
|
testCompile "junit:junit:4.+"
|
32
|
-
testCompile "org.embulk:embulk-core:0.
|
33
|
-
testCompile "org.embulk:embulk-standards:0.
|
32
|
+
testCompile "org.embulk:embulk-core:0.9.7:tests"
|
33
|
+
testCompile "org.embulk:embulk-standards:0.9.7"
|
34
34
|
testCompile "org.apache.sshd:apache-sshd:1.1.0+"
|
35
35
|
testCompile "org.littleshoot:littleproxy:1.1.0-beta1"
|
36
36
|
testCompile "io.netty:netty-all:4.0.34.Final"
|
37
|
+
testCompile "org.mockito:mockito-core:2.+"
|
37
38
|
}
|
38
39
|
|
39
40
|
jacocoTestReport {
|
@@ -13,6 +13,9 @@ import org.embulk.spi.FileOutputPlugin;
|
|
13
13
|
import org.embulk.spi.TransactionalFileOutput;
|
14
14
|
import org.embulk.spi.unit.LocalFile;
|
15
15
|
|
16
|
+
import javax.validation.constraints.Max;
|
17
|
+
import javax.validation.constraints.Min;
|
18
|
+
|
16
19
|
import java.util.List;
|
17
20
|
import java.util.Map;
|
18
21
|
|
@@ -47,7 +50,7 @@ public class SftpFileOutputPlugin
|
|
47
50
|
|
48
51
|
@Config("user_directory_is_root")
|
49
52
|
@ConfigDefault("true")
|
50
|
-
public
|
53
|
+
public boolean getUserDirIsRoot();
|
51
54
|
|
52
55
|
@Config("timeout")
|
53
56
|
@ConfigDefault("600") // 10 minutes
|
@@ -73,7 +76,18 @@ public class SftpFileOutputPlugin
|
|
73
76
|
|
74
77
|
@Config("rename_file_after_upload")
|
75
78
|
@ConfigDefault("false")
|
76
|
-
public
|
79
|
+
public boolean getRenameFileAfterUpload();
|
80
|
+
|
81
|
+
// if `false`, plugin will use remote file as buffer
|
82
|
+
@Config("local_buffering")
|
83
|
+
@ConfigDefault("true")
|
84
|
+
public boolean getLocalBuffering();
|
85
|
+
|
86
|
+
@Min(50L * 1024 * 1024) // 50MiB
|
87
|
+
@Max(10L * 1024 * 1024 * 1024) // 10GiB
|
88
|
+
@Config("temp_file_threshold")
|
89
|
+
@ConfigDefault("5368709120") // 5GiB
|
90
|
+
public long getTempFileThreshold();
|
77
91
|
}
|
78
92
|
|
79
93
|
@Override
|
@@ -141,6 +155,9 @@ public class SftpFileOutputPlugin
|
|
141
155
|
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
142
156
|
{
|
143
157
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
144
|
-
|
158
|
+
if (task.getLocalBuffering()) {
|
159
|
+
return new SftpLocalFileOutput(task, taskIndex);
|
160
|
+
}
|
161
|
+
return new SftpRemoteFileOutput(task, taskIndex);
|
145
162
|
}
|
146
163
|
}
|
@@ -0,0 +1,223 @@
|
|
1
|
+
package org.embulk.output.sftp;
|
2
|
+
|
3
|
+
import com.google.common.annotations.VisibleForTesting;
|
4
|
+
import com.google.common.base.Throwables;
|
5
|
+
import com.google.common.collect.ImmutableMap;
|
6
|
+
import org.apache.commons.vfs2.FileObject;
|
7
|
+
import org.embulk.config.TaskReport;
|
8
|
+
import org.embulk.output.sftp.utils.TimeoutCloser;
|
9
|
+
import org.embulk.spi.Buffer;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.FileOutput;
|
12
|
+
import org.embulk.spi.TransactionalFileOutput;
|
13
|
+
import org.slf4j.Logger;
|
14
|
+
|
15
|
+
import java.io.BufferedOutputStream;
|
16
|
+
import java.io.File;
|
17
|
+
import java.io.FileNotFoundException;
|
18
|
+
import java.io.FileOutputStream;
|
19
|
+
import java.io.IOException;
|
20
|
+
import java.io.OutputStream;
|
21
|
+
import java.util.ArrayList;
|
22
|
+
import java.util.List;
|
23
|
+
import java.util.Map;
|
24
|
+
|
25
|
+
import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
|
26
|
+
|
27
|
+
/**
|
28
|
+
* Created by takahiro.nakayama on 10/20/15.
|
29
|
+
*/
|
30
|
+
public class SftpLocalFileOutput
|
31
|
+
implements FileOutput, TransactionalFileOutput
|
32
|
+
{
|
33
|
+
// to make it clear that it is a constant
|
34
|
+
static final String TMP_SUFFIX = ".tmp";
|
35
|
+
|
36
|
+
final Logger logger = Exec.getLogger(getClass());
|
37
|
+
private final String pathPrefix;
|
38
|
+
private final String sequenceFormat;
|
39
|
+
private final String fileNameExtension;
|
40
|
+
private boolean renameFileAfterUpload;
|
41
|
+
|
42
|
+
private final int taskIndex;
|
43
|
+
final SftpUtils sftpUtils;
|
44
|
+
int fileIndex = 0;
|
45
|
+
private File tempFile;
|
46
|
+
private BufferedOutputStream localOutput = null;
|
47
|
+
List<Map<String, String>> fileList = new ArrayList<>();
|
48
|
+
String curFilename;
|
49
|
+
String tempFilename;
|
50
|
+
|
51
|
+
/* for file splitting purpose */
|
52
|
+
private final long threshold; // local file size to flush (upload to server)
|
53
|
+
boolean appending = false; // when local file exceeds threshold, go to append mode
|
54
|
+
FileObject remoteFile;
|
55
|
+
BufferedOutputStream remoteOutput; // to keep output stream open during append mode
|
56
|
+
long bufLen = 0L; // local temp file size
|
57
|
+
|
58
|
+
SftpLocalFileOutput(PluginTask task, int taskIndex)
|
59
|
+
{
|
60
|
+
this.pathPrefix = task.getPathPrefix();
|
61
|
+
this.sequenceFormat = task.getSequenceFormat();
|
62
|
+
this.fileNameExtension = task.getFileNameExtension();
|
63
|
+
this.renameFileAfterUpload = task.getRenameFileAfterUpload();
|
64
|
+
this.taskIndex = taskIndex;
|
65
|
+
this.sftpUtils = new SftpUtils(task);
|
66
|
+
this.threshold = task.getTempFileThreshold();
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public void nextFile()
|
71
|
+
{
|
72
|
+
closeCurrentFile();
|
73
|
+
|
74
|
+
try {
|
75
|
+
tempFile = Exec.getTempFileSpace().createTempFile();
|
76
|
+
localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
|
77
|
+
appending = false;
|
78
|
+
curFilename = getOutputFilePath();
|
79
|
+
tempFilename = curFilename + TMP_SUFFIX;
|
80
|
+
}
|
81
|
+
catch (FileNotFoundException e) {
|
82
|
+
logger.error(e.getMessage());
|
83
|
+
throw Throwables.propagate(e);
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public void add(final Buffer buffer)
|
89
|
+
{
|
90
|
+
try {
|
91
|
+
final int len = buffer.limit();
|
92
|
+
if (bufLen + len > threshold) {
|
93
|
+
localOutput.close();
|
94
|
+
// into 'append' mode
|
95
|
+
appending = true;
|
96
|
+
flush();
|
97
|
+
|
98
|
+
// reset output stream (overwrite local temp file)
|
99
|
+
localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
|
100
|
+
bufLen = 0L;
|
101
|
+
}
|
102
|
+
localOutput.write(buffer.array(), buffer.offset(), len);
|
103
|
+
bufLen += len;
|
104
|
+
}
|
105
|
+
catch (IOException ex) {
|
106
|
+
throw Throwables.propagate(ex);
|
107
|
+
}
|
108
|
+
finally {
|
109
|
+
buffer.release();
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
@Override
|
114
|
+
public void finish()
|
115
|
+
{
|
116
|
+
closeCurrentFile();
|
117
|
+
try {
|
118
|
+
flush();
|
119
|
+
}
|
120
|
+
catch (IOException e) {
|
121
|
+
throw Throwables.propagate(e);
|
122
|
+
}
|
123
|
+
closeRemoteFile();
|
124
|
+
// if input config is not `renameFileAfterUpload`
|
125
|
+
// and file is being split, we have to rename it here
|
126
|
+
// otherwise, when it exits, it won't rename
|
127
|
+
if (!renameFileAfterUpload && appending) {
|
128
|
+
sftpUtils.renameFile(tempFilename, curFilename);
|
129
|
+
}
|
130
|
+
fileList.add(fileReport());
|
131
|
+
fileIndex++;
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public void close()
|
136
|
+
{
|
137
|
+
closeCurrentFile();
|
138
|
+
// TODO
|
139
|
+
sftpUtils.close();
|
140
|
+
}
|
141
|
+
|
142
|
+
@Override
|
143
|
+
public void abort()
|
144
|
+
{
|
145
|
+
// delete incomplete files
|
146
|
+
if (renameFileAfterUpload) {
|
147
|
+
sftpUtils.deleteFile(tempFilename);
|
148
|
+
}
|
149
|
+
else {
|
150
|
+
sftpUtils.deleteFile(curFilename);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
@Override
|
155
|
+
public TaskReport commit()
|
156
|
+
{
|
157
|
+
TaskReport report = Exec.newTaskReport();
|
158
|
+
report.set("file_list", fileList);
|
159
|
+
return report;
|
160
|
+
}
|
161
|
+
|
162
|
+
void closeCurrentFile()
|
163
|
+
{
|
164
|
+
try {
|
165
|
+
if (localOutput != null) {
|
166
|
+
localOutput.close();
|
167
|
+
localOutput = null;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
catch (IOException ex) {
|
171
|
+
throw Throwables.propagate(ex);
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
void closeRemoteFile()
|
176
|
+
{
|
177
|
+
if (remoteOutput != null) {
|
178
|
+
new TimeoutCloser(remoteOutput).close();
|
179
|
+
remoteOutput = null;
|
180
|
+
remoteFile = null;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
|
184
|
+
String getOutputFilePath()
|
185
|
+
{
|
186
|
+
return pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + fileNameExtension;
|
187
|
+
}
|
188
|
+
|
189
|
+
Map<String, String> fileReport()
|
190
|
+
{
|
191
|
+
return ImmutableMap.of(
|
192
|
+
"temporary_filename", tempFilename,
|
193
|
+
"real_filename", curFilename
|
194
|
+
);
|
195
|
+
}
|
196
|
+
|
197
|
+
private void flush() throws IOException
|
198
|
+
{
|
199
|
+
if (appending) {
|
200
|
+
// open and keep stream open
|
201
|
+
if (remoteOutput == null) {
|
202
|
+
remoteFile = sftpUtils.resolve(tempFilename);
|
203
|
+
remoteOutput = sftpUtils.openStream(remoteFile);
|
204
|
+
}
|
205
|
+
sftpUtils.appendFile(tempFile, remoteFile, remoteOutput);
|
206
|
+
}
|
207
|
+
else {
|
208
|
+
sftpUtils.uploadFile(tempFile, renameFileAfterUpload ? tempFilename : curFilename);
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
@VisibleForTesting
|
213
|
+
OutputStream getLocalOutput()
|
214
|
+
{
|
215
|
+
return localOutput;
|
216
|
+
}
|
217
|
+
|
218
|
+
@VisibleForTesting
|
219
|
+
OutputStream getRemoteOutput()
|
220
|
+
{
|
221
|
+
return remoteOutput;
|
222
|
+
}
|
223
|
+
}
|
@@ -0,0 +1,120 @@
|
|
1
|
+
package org.embulk.output.sftp;
|
2
|
+
|
3
|
+
import com.google.common.base.Throwables;
|
4
|
+
import com.google.common.util.concurrent.AbstractScheduledService;
|
5
|
+
import com.google.common.util.concurrent.Service;
|
6
|
+
import org.apache.commons.vfs2.FileSystemException;
|
7
|
+
import org.embulk.output.sftp.utils.TimedCallable;
|
8
|
+
import org.embulk.spi.Buffer;
|
9
|
+
|
10
|
+
import java.io.BufferedOutputStream;
|
11
|
+
import java.util.concurrent.ExecutionException;
|
12
|
+
import java.util.concurrent.TimeUnit;
|
13
|
+
import java.util.concurrent.TimeoutException;
|
14
|
+
|
15
|
+
import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
|
16
|
+
|
17
|
+
public class SftpRemoteFileOutput extends SftpLocalFileOutput
|
18
|
+
{
|
19
|
+
private static final int TIMEOUT = 60; // 1min
|
20
|
+
private Service watcher;
|
21
|
+
|
22
|
+
SftpRemoteFileOutput(PluginTask task, int taskIndex)
|
23
|
+
{
|
24
|
+
super(task, taskIndex);
|
25
|
+
appending = true;
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public void add(final Buffer buffer)
|
30
|
+
{
|
31
|
+
try {
|
32
|
+
final int len = buffer.limit();
|
33
|
+
// time-out write
|
34
|
+
new TimedCallable<Void>()
|
35
|
+
{
|
36
|
+
@Override
|
37
|
+
public Void call() throws Exception
|
38
|
+
{
|
39
|
+
remoteOutput.write(buffer.array(), buffer.offset(), len);
|
40
|
+
return null;
|
41
|
+
}
|
42
|
+
}.call(TIMEOUT, TimeUnit.SECONDS);
|
43
|
+
bufLen += len;
|
44
|
+
}
|
45
|
+
catch (InterruptedException | ExecutionException | TimeoutException ex) {
|
46
|
+
logger.error("Failed to write buffer", ex);
|
47
|
+
stopWatcher();
|
48
|
+
throw Throwables.propagate(ex);
|
49
|
+
}
|
50
|
+
finally {
|
51
|
+
buffer.release();
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
void closeCurrentFile()
|
57
|
+
{
|
58
|
+
super.closeCurrentFile();
|
59
|
+
stopWatcher();
|
60
|
+
}
|
61
|
+
|
62
|
+
void stopWatcher()
|
63
|
+
{
|
64
|
+
if (watcher != null) {
|
65
|
+
watcher.stopAsync();
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public void nextFile()
|
71
|
+
{
|
72
|
+
closeCurrentFile();
|
73
|
+
|
74
|
+
try {
|
75
|
+
curFilename = getOutputFilePath();
|
76
|
+
tempFilename = curFilename + TMP_SUFFIX;
|
77
|
+
// resolve remote file & open output stream
|
78
|
+
remoteFile = sftpUtils.newSftpFile(sftpUtils.getSftpFileUri(tempFilename));
|
79
|
+
// this is where it's different from |SftpLocalFileOutput|
|
80
|
+
remoteOutput = new BufferedOutputStream(remoteFile.getContent().getOutputStream());
|
81
|
+
watcher = newProgressWatcher().startAsync();
|
82
|
+
}
|
83
|
+
catch (FileSystemException e) {
|
84
|
+
stopWatcher();
|
85
|
+
throw Throwables.propagate(e);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
@Override
|
90
|
+
public void finish()
|
91
|
+
{
|
92
|
+
closeCurrentFile();
|
93
|
+
closeRemoteFile();
|
94
|
+
fileList.add(fileReport());
|
95
|
+
fileIndex++;
|
96
|
+
stopWatcher();
|
97
|
+
}
|
98
|
+
|
99
|
+
private Service newProgressWatcher()
|
100
|
+
{
|
101
|
+
return new AbstractScheduledService()
|
102
|
+
{
|
103
|
+
private static final int PERIOD = 10; // seconds
|
104
|
+
private long prevLen = 0L;
|
105
|
+
|
106
|
+
@Override
|
107
|
+
protected void runOneIteration()
|
108
|
+
{
|
109
|
+
logger.info("Upload progress: {} KB - {} KB/s", bufLen / 1024, (bufLen - prevLen) / 1024 / PERIOD);
|
110
|
+
prevLen = bufLen;
|
111
|
+
}
|
112
|
+
|
113
|
+
@Override
|
114
|
+
protected Scheduler scheduler()
|
115
|
+
{
|
116
|
+
return Scheduler.newFixedRateSchedule(PERIOD, PERIOD, TimeUnit.SECONDS);
|
117
|
+
}
|
118
|
+
};
|
119
|
+
}
|
120
|
+
}
|