embulk-output-sftp 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -3
- data/CHANGELOG.md +3 -0
- data/README.md +3 -0
- data/build.gradle +6 -5
- data/src/main/java/org/embulk/output/sftp/SftpFileOutputPlugin.java +20 -3
- data/src/main/java/org/embulk/output/sftp/SftpLocalFileOutput.java +223 -0
- data/src/main/java/org/embulk/output/sftp/SftpRemoteFileOutput.java +120 -0
- data/src/main/java/org/embulk/output/sftp/SftpUtils.java +146 -123
- data/src/main/java/org/embulk/output/sftp/utils/DefaultRetry.java +55 -0
- data/src/main/java/org/embulk/output/sftp/utils/TimedCallable.java +27 -0
- data/src/main/java/org/embulk/output/sftp/utils/TimeoutCloser.java +42 -0
- data/src/test/java/org/embulk/output/sftp/TestSftpFileOutputPlugin.java +377 -8
- data/src/test/java/org/embulk/output/sftp/utils/TestTimedCallable.java +36 -0
- data/src/test/java/org/embulk/output/sftp/utils/TestTimeoutCloser.java +45 -0
- metadata +11 -5
- data/src/main/java/org/embulk/output/sftp/SftpFileOutput.java +0 -140
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98edf71b4a5d7998403671b7e181b250f7db9981
|
4
|
+
data.tar.gz: 940f0d1fea8ed83c957bdfca08a2bb89725b487b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4e8426ac217df25329da8a514b1603158bc7d6ed2b630b2e28486fcd36fb81e24aee588e5b1b2f6dea2e0996789a947eaac617330cb6180ca3e6afc35a9f0ed
|
7
|
+
data.tar.gz: 99c6f36a658a92dd3710c9b305892e5961f9d154694570b5f05e7cbf6e4ad2794adfda169e175622f3bb7ff5aa5714e25368e41a0f6224d3dce8acc004b87d9d
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
0.1.11 (2018-08-27)
|
2
|
+
- Enhance: Add 2 new configs `local_temp_file` (boolean) and `temp_file_threshold` (long)
|
3
|
+
- https://github.com/embulk/embulk-output-sftp/pull/50
|
1
4
|
0.1.10 (2018-05-07)
|
2
5
|
- Fix: Use java.util.regex.Pattern for host name validation
|
3
6
|
- https://github.com/embulk/embulk-output-sftp/pull/49
|
data/README.md
CHANGED
@@ -24,6 +24,8 @@ Stores files on a SFTP Server
|
|
24
24
|
- **file_ext**: Extension of output files (string, required)
|
25
25
|
- **sequence_format**: Format for sequence part of output files (string, default: `".%03d.%02d"`)
|
26
26
|
- **rename_file_after_upload**: Upload `file_ext` + ".tmp" first, then rename it after upload finish (boolean, default: `false`)
|
27
|
+
- **local_buffering**: Use local temp file to buffer records. If `false`, plugin will buffer records to remote file directly, with ".tmp" as filename suffix (boolean, default: `true`)
|
28
|
+
- **temp_file_threshold**: Maximum file size of local temp file, plugin will flush (append) to remote file when local temp file reaches threshold (long, default: `5368709120`, ie. 5GiB, min: 50MiB, max: 10GiB)
|
27
29
|
|
28
30
|
### Proxy configuration
|
29
31
|
|
@@ -53,6 +55,7 @@ out:
|
|
53
55
|
path_prefix: /data/sftp
|
54
56
|
file_ext: _20151020.tsv
|
55
57
|
sequence_format: ".%01d%01d"
|
58
|
+
temp_file_threshold: 10737418240 # 10GiB
|
56
59
|
```
|
57
60
|
|
58
61
|
With proxy
|
data/build.gradle
CHANGED
@@ -17,23 +17,24 @@ configurations {
|
|
17
17
|
}
|
18
18
|
|
19
19
|
group = "org.embulk.output.sftp"
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.11"
|
21
21
|
sourceCompatibility = 1.7
|
22
22
|
targetCompatibility = 1.7
|
23
23
|
|
24
24
|
dependencies {
|
25
|
-
compile "org.embulk:embulk-core:0.
|
26
|
-
provided "org.embulk:embulk-core:0.
|
25
|
+
compile "org.embulk:embulk-core:0.9.7"
|
26
|
+
provided "org.embulk:embulk-core:0.9.7"
|
27
27
|
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
|
28
28
|
compile "org.apache.commons:commons-vfs2:2.2"
|
29
29
|
compile "commons-io:commons-io:2.6"
|
30
30
|
compile "com.jcraft:jsch:0.1.54"
|
31
31
|
testCompile "junit:junit:4.+"
|
32
|
-
testCompile "org.embulk:embulk-core:0.
|
33
|
-
testCompile "org.embulk:embulk-standards:0.
|
32
|
+
testCompile "org.embulk:embulk-core:0.9.7:tests"
|
33
|
+
testCompile "org.embulk:embulk-standards:0.9.7"
|
34
34
|
testCompile "org.apache.sshd:apache-sshd:1.1.0+"
|
35
35
|
testCompile "org.littleshoot:littleproxy:1.1.0-beta1"
|
36
36
|
testCompile "io.netty:netty-all:4.0.34.Final"
|
37
|
+
testCompile "org.mockito:mockito-core:2.+"
|
37
38
|
}
|
38
39
|
|
39
40
|
jacocoTestReport {
|
@@ -13,6 +13,9 @@ import org.embulk.spi.FileOutputPlugin;
|
|
13
13
|
import org.embulk.spi.TransactionalFileOutput;
|
14
14
|
import org.embulk.spi.unit.LocalFile;
|
15
15
|
|
16
|
+
import javax.validation.constraints.Max;
|
17
|
+
import javax.validation.constraints.Min;
|
18
|
+
|
16
19
|
import java.util.List;
|
17
20
|
import java.util.Map;
|
18
21
|
|
@@ -47,7 +50,7 @@ public class SftpFileOutputPlugin
|
|
47
50
|
|
48
51
|
@Config("user_directory_is_root")
|
49
52
|
@ConfigDefault("true")
|
50
|
-
public
|
53
|
+
public boolean getUserDirIsRoot();
|
51
54
|
|
52
55
|
@Config("timeout")
|
53
56
|
@ConfigDefault("600") // 10 minutes
|
@@ -73,7 +76,18 @@ public class SftpFileOutputPlugin
|
|
73
76
|
|
74
77
|
@Config("rename_file_after_upload")
|
75
78
|
@ConfigDefault("false")
|
76
|
-
public
|
79
|
+
public boolean getRenameFileAfterUpload();
|
80
|
+
|
81
|
+
// if `false`, plugin will use remote file as buffer
|
82
|
+
@Config("local_buffering")
|
83
|
+
@ConfigDefault("true")
|
84
|
+
public boolean getLocalBuffering();
|
85
|
+
|
86
|
+
@Min(50L * 1024 * 1024) // 50MiB
|
87
|
+
@Max(10L * 1024 * 1024 * 1024) // 10GiB
|
88
|
+
@Config("temp_file_threshold")
|
89
|
+
@ConfigDefault("5368709120") // 5GiB
|
90
|
+
public long getTempFileThreshold();
|
77
91
|
}
|
78
92
|
|
79
93
|
@Override
|
@@ -141,6 +155,9 @@ public class SftpFileOutputPlugin
|
|
141
155
|
public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
|
142
156
|
{
|
143
157
|
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
144
|
-
|
158
|
+
if (task.getLocalBuffering()) {
|
159
|
+
return new SftpLocalFileOutput(task, taskIndex);
|
160
|
+
}
|
161
|
+
return new SftpRemoteFileOutput(task, taskIndex);
|
145
162
|
}
|
146
163
|
}
|
@@ -0,0 +1,223 @@
|
|
1
|
+
package org.embulk.output.sftp;
|
2
|
+
|
3
|
+
import com.google.common.annotations.VisibleForTesting;
|
4
|
+
import com.google.common.base.Throwables;
|
5
|
+
import com.google.common.collect.ImmutableMap;
|
6
|
+
import org.apache.commons.vfs2.FileObject;
|
7
|
+
import org.embulk.config.TaskReport;
|
8
|
+
import org.embulk.output.sftp.utils.TimeoutCloser;
|
9
|
+
import org.embulk.spi.Buffer;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.FileOutput;
|
12
|
+
import org.embulk.spi.TransactionalFileOutput;
|
13
|
+
import org.slf4j.Logger;
|
14
|
+
|
15
|
+
import java.io.BufferedOutputStream;
|
16
|
+
import java.io.File;
|
17
|
+
import java.io.FileNotFoundException;
|
18
|
+
import java.io.FileOutputStream;
|
19
|
+
import java.io.IOException;
|
20
|
+
import java.io.OutputStream;
|
21
|
+
import java.util.ArrayList;
|
22
|
+
import java.util.List;
|
23
|
+
import java.util.Map;
|
24
|
+
|
25
|
+
import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
|
26
|
+
|
27
|
+
/**
|
28
|
+
* Created by takahiro.nakayama on 10/20/15.
|
29
|
+
*/
|
30
|
+
public class SftpLocalFileOutput
|
31
|
+
implements FileOutput, TransactionalFileOutput
|
32
|
+
{
|
33
|
+
// to make it clear that it is a constant
|
34
|
+
static final String TMP_SUFFIX = ".tmp";
|
35
|
+
|
36
|
+
final Logger logger = Exec.getLogger(getClass());
|
37
|
+
private final String pathPrefix;
|
38
|
+
private final String sequenceFormat;
|
39
|
+
private final String fileNameExtension;
|
40
|
+
private boolean renameFileAfterUpload;
|
41
|
+
|
42
|
+
private final int taskIndex;
|
43
|
+
final SftpUtils sftpUtils;
|
44
|
+
int fileIndex = 0;
|
45
|
+
private File tempFile;
|
46
|
+
private BufferedOutputStream localOutput = null;
|
47
|
+
List<Map<String, String>> fileList = new ArrayList<>();
|
48
|
+
String curFilename;
|
49
|
+
String tempFilename;
|
50
|
+
|
51
|
+
/* for file splitting purpose */
|
52
|
+
private final long threshold; // local file size to flush (upload to server)
|
53
|
+
boolean appending = false; // when local file exceeds threshold, go to append mode
|
54
|
+
FileObject remoteFile;
|
55
|
+
BufferedOutputStream remoteOutput; // to keep output stream open during append mode
|
56
|
+
long bufLen = 0L; // local temp file size
|
57
|
+
|
58
|
+
SftpLocalFileOutput(PluginTask task, int taskIndex)
|
59
|
+
{
|
60
|
+
this.pathPrefix = task.getPathPrefix();
|
61
|
+
this.sequenceFormat = task.getSequenceFormat();
|
62
|
+
this.fileNameExtension = task.getFileNameExtension();
|
63
|
+
this.renameFileAfterUpload = task.getRenameFileAfterUpload();
|
64
|
+
this.taskIndex = taskIndex;
|
65
|
+
this.sftpUtils = new SftpUtils(task);
|
66
|
+
this.threshold = task.getTempFileThreshold();
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public void nextFile()
|
71
|
+
{
|
72
|
+
closeCurrentFile();
|
73
|
+
|
74
|
+
try {
|
75
|
+
tempFile = Exec.getTempFileSpace().createTempFile();
|
76
|
+
localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
|
77
|
+
appending = false;
|
78
|
+
curFilename = getOutputFilePath();
|
79
|
+
tempFilename = curFilename + TMP_SUFFIX;
|
80
|
+
}
|
81
|
+
catch (FileNotFoundException e) {
|
82
|
+
logger.error(e.getMessage());
|
83
|
+
throw Throwables.propagate(e);
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public void add(final Buffer buffer)
|
89
|
+
{
|
90
|
+
try {
|
91
|
+
final int len = buffer.limit();
|
92
|
+
if (bufLen + len > threshold) {
|
93
|
+
localOutput.close();
|
94
|
+
// into 'append' mode
|
95
|
+
appending = true;
|
96
|
+
flush();
|
97
|
+
|
98
|
+
// reset output stream (overwrite local temp file)
|
99
|
+
localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
|
100
|
+
bufLen = 0L;
|
101
|
+
}
|
102
|
+
localOutput.write(buffer.array(), buffer.offset(), len);
|
103
|
+
bufLen += len;
|
104
|
+
}
|
105
|
+
catch (IOException ex) {
|
106
|
+
throw Throwables.propagate(ex);
|
107
|
+
}
|
108
|
+
finally {
|
109
|
+
buffer.release();
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
@Override
|
114
|
+
public void finish()
|
115
|
+
{
|
116
|
+
closeCurrentFile();
|
117
|
+
try {
|
118
|
+
flush();
|
119
|
+
}
|
120
|
+
catch (IOException e) {
|
121
|
+
throw Throwables.propagate(e);
|
122
|
+
}
|
123
|
+
closeRemoteFile();
|
124
|
+
// if input config is not `renameFileAfterUpload`
|
125
|
+
// and file is being split, we have to rename it here
|
126
|
+
// otherwise, when it exits, it won't rename
|
127
|
+
if (!renameFileAfterUpload && appending) {
|
128
|
+
sftpUtils.renameFile(tempFilename, curFilename);
|
129
|
+
}
|
130
|
+
fileList.add(fileReport());
|
131
|
+
fileIndex++;
|
132
|
+
}
|
133
|
+
|
134
|
+
@Override
|
135
|
+
public void close()
|
136
|
+
{
|
137
|
+
closeCurrentFile();
|
138
|
+
// TODO
|
139
|
+
sftpUtils.close();
|
140
|
+
}
|
141
|
+
|
142
|
+
@Override
|
143
|
+
public void abort()
|
144
|
+
{
|
145
|
+
// delete incomplete files
|
146
|
+
if (renameFileAfterUpload) {
|
147
|
+
sftpUtils.deleteFile(tempFilename);
|
148
|
+
}
|
149
|
+
else {
|
150
|
+
sftpUtils.deleteFile(curFilename);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
@Override
|
155
|
+
public TaskReport commit()
|
156
|
+
{
|
157
|
+
TaskReport report = Exec.newTaskReport();
|
158
|
+
report.set("file_list", fileList);
|
159
|
+
return report;
|
160
|
+
}
|
161
|
+
|
162
|
+
void closeCurrentFile()
|
163
|
+
{
|
164
|
+
try {
|
165
|
+
if (localOutput != null) {
|
166
|
+
localOutput.close();
|
167
|
+
localOutput = null;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
catch (IOException ex) {
|
171
|
+
throw Throwables.propagate(ex);
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
void closeRemoteFile()
|
176
|
+
{
|
177
|
+
if (remoteOutput != null) {
|
178
|
+
new TimeoutCloser(remoteOutput).close();
|
179
|
+
remoteOutput = null;
|
180
|
+
remoteFile = null;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
|
184
|
+
String getOutputFilePath()
|
185
|
+
{
|
186
|
+
return pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + fileNameExtension;
|
187
|
+
}
|
188
|
+
|
189
|
+
Map<String, String> fileReport()
|
190
|
+
{
|
191
|
+
return ImmutableMap.of(
|
192
|
+
"temporary_filename", tempFilename,
|
193
|
+
"real_filename", curFilename
|
194
|
+
);
|
195
|
+
}
|
196
|
+
|
197
|
+
private void flush() throws IOException
|
198
|
+
{
|
199
|
+
if (appending) {
|
200
|
+
// open and keep stream open
|
201
|
+
if (remoteOutput == null) {
|
202
|
+
remoteFile = sftpUtils.resolve(tempFilename);
|
203
|
+
remoteOutput = sftpUtils.openStream(remoteFile);
|
204
|
+
}
|
205
|
+
sftpUtils.appendFile(tempFile, remoteFile, remoteOutput);
|
206
|
+
}
|
207
|
+
else {
|
208
|
+
sftpUtils.uploadFile(tempFile, renameFileAfterUpload ? tempFilename : curFilename);
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
@VisibleForTesting
|
213
|
+
OutputStream getLocalOutput()
|
214
|
+
{
|
215
|
+
return localOutput;
|
216
|
+
}
|
217
|
+
|
218
|
+
@VisibleForTesting
|
219
|
+
OutputStream getRemoteOutput()
|
220
|
+
{
|
221
|
+
return remoteOutput;
|
222
|
+
}
|
223
|
+
}
|
@@ -0,0 +1,120 @@
|
|
1
|
+
package org.embulk.output.sftp;
|
2
|
+
|
3
|
+
import com.google.common.base.Throwables;
|
4
|
+
import com.google.common.util.concurrent.AbstractScheduledService;
|
5
|
+
import com.google.common.util.concurrent.Service;
|
6
|
+
import org.apache.commons.vfs2.FileSystemException;
|
7
|
+
import org.embulk.output.sftp.utils.TimedCallable;
|
8
|
+
import org.embulk.spi.Buffer;
|
9
|
+
|
10
|
+
import java.io.BufferedOutputStream;
|
11
|
+
import java.util.concurrent.ExecutionException;
|
12
|
+
import java.util.concurrent.TimeUnit;
|
13
|
+
import java.util.concurrent.TimeoutException;
|
14
|
+
|
15
|
+
import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
|
16
|
+
|
17
|
+
public class SftpRemoteFileOutput extends SftpLocalFileOutput
|
18
|
+
{
|
19
|
+
private static final int TIMEOUT = 60; // 1min
|
20
|
+
private Service watcher;
|
21
|
+
|
22
|
+
SftpRemoteFileOutput(PluginTask task, int taskIndex)
|
23
|
+
{
|
24
|
+
super(task, taskIndex);
|
25
|
+
appending = true;
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public void add(final Buffer buffer)
|
30
|
+
{
|
31
|
+
try {
|
32
|
+
final int len = buffer.limit();
|
33
|
+
// time-out write
|
34
|
+
new TimedCallable<Void>()
|
35
|
+
{
|
36
|
+
@Override
|
37
|
+
public Void call() throws Exception
|
38
|
+
{
|
39
|
+
remoteOutput.write(buffer.array(), buffer.offset(), len);
|
40
|
+
return null;
|
41
|
+
}
|
42
|
+
}.call(TIMEOUT, TimeUnit.SECONDS);
|
43
|
+
bufLen += len;
|
44
|
+
}
|
45
|
+
catch (InterruptedException | ExecutionException | TimeoutException ex) {
|
46
|
+
logger.error("Failed to write buffer", ex);
|
47
|
+
stopWatcher();
|
48
|
+
throw Throwables.propagate(ex);
|
49
|
+
}
|
50
|
+
finally {
|
51
|
+
buffer.release();
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
void closeCurrentFile()
|
57
|
+
{
|
58
|
+
super.closeCurrentFile();
|
59
|
+
stopWatcher();
|
60
|
+
}
|
61
|
+
|
62
|
+
void stopWatcher()
|
63
|
+
{
|
64
|
+
if (watcher != null) {
|
65
|
+
watcher.stopAsync();
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public void nextFile()
|
71
|
+
{
|
72
|
+
closeCurrentFile();
|
73
|
+
|
74
|
+
try {
|
75
|
+
curFilename = getOutputFilePath();
|
76
|
+
tempFilename = curFilename + TMP_SUFFIX;
|
77
|
+
// resolve remote file & open output stream
|
78
|
+
remoteFile = sftpUtils.newSftpFile(sftpUtils.getSftpFileUri(tempFilename));
|
79
|
+
// this is where it's different from |SftpLocalFileOutput|
|
80
|
+
remoteOutput = new BufferedOutputStream(remoteFile.getContent().getOutputStream());
|
81
|
+
watcher = newProgressWatcher().startAsync();
|
82
|
+
}
|
83
|
+
catch (FileSystemException e) {
|
84
|
+
stopWatcher();
|
85
|
+
throw Throwables.propagate(e);
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
@Override
|
90
|
+
public void finish()
|
91
|
+
{
|
92
|
+
closeCurrentFile();
|
93
|
+
closeRemoteFile();
|
94
|
+
fileList.add(fileReport());
|
95
|
+
fileIndex++;
|
96
|
+
stopWatcher();
|
97
|
+
}
|
98
|
+
|
99
|
+
private Service newProgressWatcher()
|
100
|
+
{
|
101
|
+
return new AbstractScheduledService()
|
102
|
+
{
|
103
|
+
private static final int PERIOD = 10; // seconds
|
104
|
+
private long prevLen = 0L;
|
105
|
+
|
106
|
+
@Override
|
107
|
+
protected void runOneIteration()
|
108
|
+
{
|
109
|
+
logger.info("Upload progress: {} KB - {} KB/s", bufLen / 1024, (bufLen - prevLen) / 1024 / PERIOD);
|
110
|
+
prevLen = bufLen;
|
111
|
+
}
|
112
|
+
|
113
|
+
@Override
|
114
|
+
protected Scheduler scheduler()
|
115
|
+
{
|
116
|
+
return Scheduler.newFixedRateSchedule(PERIOD, PERIOD, TimeUnit.SECONDS);
|
117
|
+
}
|
118
|
+
};
|
119
|
+
}
|
120
|
+
}
|