embulk-output-sftp 0.1.10 → 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c56a0584cf221cab6a3e4caf8f5f6b7fd8c73750
4
- data.tar.gz: 9a54d758d53715b2dd6b13ecebbea1306e92ea1d
3
+ metadata.gz: 98edf71b4a5d7998403671b7e181b250f7db9981
4
+ data.tar.gz: 940f0d1fea8ed83c957bdfca08a2bb89725b487b
5
5
  SHA512:
6
- metadata.gz: 10749e577ff285cc6cce4254517f712925e637e959cddcefce74dd086cb54605c675bc272270b50d5291cd8dc36a307dab205dbcba65aaeb1a29814b9b3ad63b
7
- data.tar.gz: 9991eb85333675b381725365232947f628862a6e9430e912f3eeeee0d3e6d76e31db40a92fd7b7856706378c049c5b99ace8617980181fa6ec3bd1fc5b6963e9
6
+ metadata.gz: c4e8426ac217df25329da8a514b1603158bc7d6ed2b630b2e28486fcd36fb81e24aee588e5b1b2f6dea2e0996789a947eaac617330cb6180ca3e6afc35a9f0ed
7
+ data.tar.gz: 99c6f36a658a92dd3710c9b305892e5961f9d154694570b5f05e7cbf6e4ad2794adfda169e175622f3bb7ff5aa5714e25368e41a0f6224d3dce8acc004b87d9d
data/.travis.yml CHANGED
@@ -1,8 +1,7 @@
1
- dist: precise
1
+ dist: trusty
2
2
  language: java
3
3
  jdk:
4
- - openjdk7
5
- - oraclejdk7
4
+ - openjdk8
6
5
  - oraclejdk8
7
6
  script:
8
7
  - ./gradlew test
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ 0.1.11 (2018-08-27)
2
+ - Enhance: Add 2 new configs `local_temp_file` (boolean) and `temp_file_threshold` (long)
3
+ - https://github.com/embulk/embulk-output-sftp/pull/50
1
4
  0.1.10 (2018-05-07)
2
5
  - Fix: Use java.util.regex.Pattern for host name validation
3
6
  - https://github.com/embulk/embulk-output-sftp/pull/49
data/README.md CHANGED
@@ -24,6 +24,8 @@ Stores files on a SFTP Server
24
24
  - **file_ext**: Extension of output files (string, required)
25
25
  - **sequence_format**: Format for sequence part of output files (string, default: `".%03d.%02d"`)
26
26
  - **rename_file_after_upload**: Upload `file_ext` + ".tmp" first, then rename it after upload finish (boolean, default: `false`)
27
+ - **local_buffering**: Use local temp file to buffer records. If `false`, plugin will buffer records to remote file directly, with ".tmp" as filename suffix (boolean, default: `true`)
28
+ - **temp_file_threshold**: Maximum file size of local temp file, plugin will flush (append) to remote file when local temp file reaches threshold (long, default: `5368709120`, ie. 5GiB, min: 50MiB, max: 10GiB)
27
29
 
28
30
  ### Proxy configuration
29
31
 
@@ -53,6 +55,7 @@ out:
53
55
  path_prefix: /data/sftp
54
56
  file_ext: _20151020.tsv
55
57
  sequence_format: ".%01d%01d"
58
+ temp_file_threshold: 10737418240 # 10GiB
56
59
  ```
57
60
 
58
61
  With proxy
data/build.gradle CHANGED
@@ -17,23 +17,24 @@ configurations {
17
17
  }
18
18
 
19
19
  group = "org.embulk.output.sftp"
20
- version = "0.1.10"
20
+ version = "0.1.11"
21
21
  sourceCompatibility = 1.7
22
22
  targetCompatibility = 1.7
23
23
 
24
24
  dependencies {
25
- compile "org.embulk:embulk-core:0.8.6"
26
- provided "org.embulk:embulk-core:0.8.6"
25
+ compile "org.embulk:embulk-core:0.9.7"
26
+ provided "org.embulk:embulk-core:0.9.7"
27
27
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
28
28
  compile "org.apache.commons:commons-vfs2:2.2"
29
29
  compile "commons-io:commons-io:2.6"
30
30
  compile "com.jcraft:jsch:0.1.54"
31
31
  testCompile "junit:junit:4.+"
32
- testCompile "org.embulk:embulk-core:0.8.6:tests"
33
- testCompile "org.embulk:embulk-standards:0.8.6"
32
+ testCompile "org.embulk:embulk-core:0.9.7:tests"
33
+ testCompile "org.embulk:embulk-standards:0.9.7"
34
34
  testCompile "org.apache.sshd:apache-sshd:1.1.0+"
35
35
  testCompile "org.littleshoot:littleproxy:1.1.0-beta1"
36
36
  testCompile "io.netty:netty-all:4.0.34.Final"
37
+ testCompile "org.mockito:mockito-core:2.+"
37
38
  }
38
39
 
39
40
  jacocoTestReport {
@@ -13,6 +13,9 @@ import org.embulk.spi.FileOutputPlugin;
13
13
  import org.embulk.spi.TransactionalFileOutput;
14
14
  import org.embulk.spi.unit.LocalFile;
15
15
 
16
+ import javax.validation.constraints.Max;
17
+ import javax.validation.constraints.Min;
18
+
16
19
  import java.util.List;
17
20
  import java.util.Map;
18
21
 
@@ -47,7 +50,7 @@ public class SftpFileOutputPlugin
47
50
 
48
51
  @Config("user_directory_is_root")
49
52
  @ConfigDefault("true")
50
- public Boolean getUserDirIsRoot();
53
+ public boolean getUserDirIsRoot();
51
54
 
52
55
  @Config("timeout")
53
56
  @ConfigDefault("600") // 10 minutes
@@ -73,7 +76,18 @@ public class SftpFileOutputPlugin
73
76
 
74
77
  @Config("rename_file_after_upload")
75
78
  @ConfigDefault("false")
76
- public Boolean getRenameFileAfterUpload();
79
+ public boolean getRenameFileAfterUpload();
80
+
81
+ // if `false`, plugin will use remote file as buffer
82
+ @Config("local_buffering")
83
+ @ConfigDefault("true")
84
+ public boolean getLocalBuffering();
85
+
86
+ @Min(50L * 1024 * 1024) // 50MiB
87
+ @Max(10L * 1024 * 1024 * 1024) // 10GiB
88
+ @Config("temp_file_threshold")
89
+ @ConfigDefault("5368709120") // 5GiB
90
+ public long getTempFileThreshold();
77
91
  }
78
92
 
79
93
  @Override
@@ -141,6 +155,9 @@ public class SftpFileOutputPlugin
141
155
  public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
142
156
  {
143
157
  final PluginTask task = taskSource.loadTask(PluginTask.class);
144
- return new SftpFileOutput(task, taskIndex);
158
+ if (task.getLocalBuffering()) {
159
+ return new SftpLocalFileOutput(task, taskIndex);
160
+ }
161
+ return new SftpRemoteFileOutput(task, taskIndex);
145
162
  }
146
163
  }
@@ -0,0 +1,223 @@
1
+ package org.embulk.output.sftp;
2
+
3
+ import com.google.common.annotations.VisibleForTesting;
4
+ import com.google.common.base.Throwables;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import org.apache.commons.vfs2.FileObject;
7
+ import org.embulk.config.TaskReport;
8
+ import org.embulk.output.sftp.utils.TimeoutCloser;
9
+ import org.embulk.spi.Buffer;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.FileOutput;
12
+ import org.embulk.spi.TransactionalFileOutput;
13
+ import org.slf4j.Logger;
14
+
15
+ import java.io.BufferedOutputStream;
16
+ import java.io.File;
17
+ import java.io.FileNotFoundException;
18
+ import java.io.FileOutputStream;
19
+ import java.io.IOException;
20
+ import java.io.OutputStream;
21
+ import java.util.ArrayList;
22
+ import java.util.List;
23
+ import java.util.Map;
24
+
25
+ import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
26
+
27
+ /**
28
+ * Created by takahiro.nakayama on 10/20/15.
29
+ */
30
+ public class SftpLocalFileOutput
31
+ implements FileOutput, TransactionalFileOutput
32
+ {
33
+ // to make it clear that it is a constant
34
+ static final String TMP_SUFFIX = ".tmp";
35
+
36
+ final Logger logger = Exec.getLogger(getClass());
37
+ private final String pathPrefix;
38
+ private final String sequenceFormat;
39
+ private final String fileNameExtension;
40
+ private boolean renameFileAfterUpload;
41
+
42
+ private final int taskIndex;
43
+ final SftpUtils sftpUtils;
44
+ int fileIndex = 0;
45
+ private File tempFile;
46
+ private BufferedOutputStream localOutput = null;
47
+ List<Map<String, String>> fileList = new ArrayList<>();
48
+ String curFilename;
49
+ String tempFilename;
50
+
51
+ /* for file splitting purpose */
52
+ private final long threshold; // local file size to flush (upload to server)
53
+ boolean appending = false; // when local file exceeds threshold, go to append mode
54
+ FileObject remoteFile;
55
+ BufferedOutputStream remoteOutput; // to keep output stream open during append mode
56
+ long bufLen = 0L; // local temp file size
57
+
58
+ SftpLocalFileOutput(PluginTask task, int taskIndex)
59
+ {
60
+ this.pathPrefix = task.getPathPrefix();
61
+ this.sequenceFormat = task.getSequenceFormat();
62
+ this.fileNameExtension = task.getFileNameExtension();
63
+ this.renameFileAfterUpload = task.getRenameFileAfterUpload();
64
+ this.taskIndex = taskIndex;
65
+ this.sftpUtils = new SftpUtils(task);
66
+ this.threshold = task.getTempFileThreshold();
67
+ }
68
+
69
+ @Override
70
+ public void nextFile()
71
+ {
72
+ closeCurrentFile();
73
+
74
+ try {
75
+ tempFile = Exec.getTempFileSpace().createTempFile();
76
+ localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
77
+ appending = false;
78
+ curFilename = getOutputFilePath();
79
+ tempFilename = curFilename + TMP_SUFFIX;
80
+ }
81
+ catch (FileNotFoundException e) {
82
+ logger.error(e.getMessage());
83
+ throw Throwables.propagate(e);
84
+ }
85
+ }
86
+
87
+ @Override
88
+ public void add(final Buffer buffer)
89
+ {
90
+ try {
91
+ final int len = buffer.limit();
92
+ if (bufLen + len > threshold) {
93
+ localOutput.close();
94
+ // into 'append' mode
95
+ appending = true;
96
+ flush();
97
+
98
+ // reset output stream (overwrite local temp file)
99
+ localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
100
+ bufLen = 0L;
101
+ }
102
+ localOutput.write(buffer.array(), buffer.offset(), len);
103
+ bufLen += len;
104
+ }
105
+ catch (IOException ex) {
106
+ throw Throwables.propagate(ex);
107
+ }
108
+ finally {
109
+ buffer.release();
110
+ }
111
+ }
112
+
113
+ @Override
114
+ public void finish()
115
+ {
116
+ closeCurrentFile();
117
+ try {
118
+ flush();
119
+ }
120
+ catch (IOException e) {
121
+ throw Throwables.propagate(e);
122
+ }
123
+ closeRemoteFile();
124
+ // if input config is not `renameFileAfterUpload`
125
+ // and file is being split, we have to rename it here
126
+ // otherwise, when it exits, it won't rename
127
+ if (!renameFileAfterUpload && appending) {
128
+ sftpUtils.renameFile(tempFilename, curFilename);
129
+ }
130
+ fileList.add(fileReport());
131
+ fileIndex++;
132
+ }
133
+
134
+ @Override
135
+ public void close()
136
+ {
137
+ closeCurrentFile();
138
+ // TODO
139
+ sftpUtils.close();
140
+ }
141
+
142
+ @Override
143
+ public void abort()
144
+ {
145
+ // delete incomplete files
146
+ if (renameFileAfterUpload) {
147
+ sftpUtils.deleteFile(tempFilename);
148
+ }
149
+ else {
150
+ sftpUtils.deleteFile(curFilename);
151
+ }
152
+ }
153
+
154
+ @Override
155
+ public TaskReport commit()
156
+ {
157
+ TaskReport report = Exec.newTaskReport();
158
+ report.set("file_list", fileList);
159
+ return report;
160
+ }
161
+
162
+ void closeCurrentFile()
163
+ {
164
+ try {
165
+ if (localOutput != null) {
166
+ localOutput.close();
167
+ localOutput = null;
168
+ }
169
+ }
170
+ catch (IOException ex) {
171
+ throw Throwables.propagate(ex);
172
+ }
173
+ }
174
+
175
+ void closeRemoteFile()
176
+ {
177
+ if (remoteOutput != null) {
178
+ new TimeoutCloser(remoteOutput).close();
179
+ remoteOutput = null;
180
+ remoteFile = null;
181
+ }
182
+ }
183
+
184
+ String getOutputFilePath()
185
+ {
186
+ return pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + fileNameExtension;
187
+ }
188
+
189
+ Map<String, String> fileReport()
190
+ {
191
+ return ImmutableMap.of(
192
+ "temporary_filename", tempFilename,
193
+ "real_filename", curFilename
194
+ );
195
+ }
196
+
197
+ private void flush() throws IOException
198
+ {
199
+ if (appending) {
200
+ // open and keep stream open
201
+ if (remoteOutput == null) {
202
+ remoteFile = sftpUtils.resolve(tempFilename);
203
+ remoteOutput = sftpUtils.openStream(remoteFile);
204
+ }
205
+ sftpUtils.appendFile(tempFile, remoteFile, remoteOutput);
206
+ }
207
+ else {
208
+ sftpUtils.uploadFile(tempFile, renameFileAfterUpload ? tempFilename : curFilename);
209
+ }
210
+ }
211
+
212
+ @VisibleForTesting
213
+ OutputStream getLocalOutput()
214
+ {
215
+ return localOutput;
216
+ }
217
+
218
+ @VisibleForTesting
219
+ OutputStream getRemoteOutput()
220
+ {
221
+ return remoteOutput;
222
+ }
223
+ }
@@ -0,0 +1,120 @@
1
+ package org.embulk.output.sftp;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import com.google.common.util.concurrent.AbstractScheduledService;
5
+ import com.google.common.util.concurrent.Service;
6
+ import org.apache.commons.vfs2.FileSystemException;
7
+ import org.embulk.output.sftp.utils.TimedCallable;
8
+ import org.embulk.spi.Buffer;
9
+
10
+ import java.io.BufferedOutputStream;
11
+ import java.util.concurrent.ExecutionException;
12
+ import java.util.concurrent.TimeUnit;
13
+ import java.util.concurrent.TimeoutException;
14
+
15
+ import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
16
+
17
+ public class SftpRemoteFileOutput extends SftpLocalFileOutput
18
+ {
19
+ private static final int TIMEOUT = 60; // 1min
20
+ private Service watcher;
21
+
22
+ SftpRemoteFileOutput(PluginTask task, int taskIndex)
23
+ {
24
+ super(task, taskIndex);
25
+ appending = true;
26
+ }
27
+
28
+ @Override
29
+ public void add(final Buffer buffer)
30
+ {
31
+ try {
32
+ final int len = buffer.limit();
33
+ // time-out write
34
+ new TimedCallable<Void>()
35
+ {
36
+ @Override
37
+ public Void call() throws Exception
38
+ {
39
+ remoteOutput.write(buffer.array(), buffer.offset(), len);
40
+ return null;
41
+ }
42
+ }.call(TIMEOUT, TimeUnit.SECONDS);
43
+ bufLen += len;
44
+ }
45
+ catch (InterruptedException | ExecutionException | TimeoutException ex) {
46
+ logger.error("Failed to write buffer", ex);
47
+ stopWatcher();
48
+ throw Throwables.propagate(ex);
49
+ }
50
+ finally {
51
+ buffer.release();
52
+ }
53
+ }
54
+
55
+ @Override
56
+ void closeCurrentFile()
57
+ {
58
+ super.closeCurrentFile();
59
+ stopWatcher();
60
+ }
61
+
62
+ void stopWatcher()
63
+ {
64
+ if (watcher != null) {
65
+ watcher.stopAsync();
66
+ }
67
+ }
68
+
69
+ @Override
70
+ public void nextFile()
71
+ {
72
+ closeCurrentFile();
73
+
74
+ try {
75
+ curFilename = getOutputFilePath();
76
+ tempFilename = curFilename + TMP_SUFFIX;
77
+ // resolve remote file & open output stream
78
+ remoteFile = sftpUtils.newSftpFile(sftpUtils.getSftpFileUri(tempFilename));
79
+ // this is where it's different from |SftpLocalFileOutput|
80
+ remoteOutput = new BufferedOutputStream(remoteFile.getContent().getOutputStream());
81
+ watcher = newProgressWatcher().startAsync();
82
+ }
83
+ catch (FileSystemException e) {
84
+ stopWatcher();
85
+ throw Throwables.propagate(e);
86
+ }
87
+ }
88
+
89
+ @Override
90
+ public void finish()
91
+ {
92
+ closeCurrentFile();
93
+ closeRemoteFile();
94
+ fileList.add(fileReport());
95
+ fileIndex++;
96
+ stopWatcher();
97
+ }
98
+
99
+ private Service newProgressWatcher()
100
+ {
101
+ return new AbstractScheduledService()
102
+ {
103
+ private static final int PERIOD = 10; // seconds
104
+ private long prevLen = 0L;
105
+
106
+ @Override
107
+ protected void runOneIteration()
108
+ {
109
+ logger.info("Upload progress: {} KB - {} KB/s", bufLen / 1024, (bufLen - prevLen) / 1024 / PERIOD);
110
+ prevLen = bufLen;
111
+ }
112
+
113
+ @Override
114
+ protected Scheduler scheduler()
115
+ {
116
+ return Scheduler.newFixedRateSchedule(PERIOD, PERIOD, TimeUnit.SECONDS);
117
+ }
118
+ };
119
+ }
120
+ }