embulk-output-sftp 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c56a0584cf221cab6a3e4caf8f5f6b7fd8c73750
4
- data.tar.gz: 9a54d758d53715b2dd6b13ecebbea1306e92ea1d
3
+ metadata.gz: 98edf71b4a5d7998403671b7e181b250f7db9981
4
+ data.tar.gz: 940f0d1fea8ed83c957bdfca08a2bb89725b487b
5
5
  SHA512:
6
- metadata.gz: 10749e577ff285cc6cce4254517f712925e637e959cddcefce74dd086cb54605c675bc272270b50d5291cd8dc36a307dab205dbcba65aaeb1a29814b9b3ad63b
7
- data.tar.gz: 9991eb85333675b381725365232947f628862a6e9430e912f3eeeee0d3e6d76e31db40a92fd7b7856706378c049c5b99ace8617980181fa6ec3bd1fc5b6963e9
6
+ metadata.gz: c4e8426ac217df25329da8a514b1603158bc7d6ed2b630b2e28486fcd36fb81e24aee588e5b1b2f6dea2e0996789a947eaac617330cb6180ca3e6afc35a9f0ed
7
+ data.tar.gz: 99c6f36a658a92dd3710c9b305892e5961f9d154694570b5f05e7cbf6e4ad2794adfda169e175622f3bb7ff5aa5714e25368e41a0f6224d3dce8acc004b87d9d
data/.travis.yml CHANGED
@@ -1,8 +1,7 @@
1
- dist: precise
1
+ dist: trusty
2
2
  language: java
3
3
  jdk:
4
- - openjdk7
5
- - oraclejdk7
4
+ - openjdk8
6
5
  - oraclejdk8
7
6
  script:
8
7
  - ./gradlew test
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ 0.1.11 (2018-08-27)
2
+ - Enhance: Add 2 new configs `local_temp_file` (boolean) and `temp_file_threshold` (long)
3
+ - https://github.com/embulk/embulk-output-sftp/pull/50
1
4
  0.1.10 (2018-05-07)
2
5
  - Fix: Use java.util.regex.Pattern for host name validation
3
6
  - https://github.com/embulk/embulk-output-sftp/pull/49
data/README.md CHANGED
@@ -24,6 +24,8 @@ Stores files on a SFTP Server
24
24
  - **file_ext**: Extension of output files (string, required)
25
25
  - **sequence_format**: Format for sequence part of output files (string, default: `".%03d.%02d"`)
26
26
  - **rename_file_after_upload**: Upload `file_ext` + ".tmp" first, then rename it after upload finish (boolean, default: `false`)
27
+ - **local_buffering**: Use local temp file to buffer records. If `false`, plugin will buffer records to remote file directly, with ".tmp" as filename suffix (boolean, default: `true`)
28
+ - **temp_file_threshold**: Maximum file size of local temp file, plugin will flush (append) to remote file when local temp file reaches threshold (long, default: `5368709120`, ie. 5GiB, min: 50MiB, max: 10GiB)
27
29
 
28
30
  ### Proxy configuration
29
31
 
@@ -53,6 +55,7 @@ out:
53
55
  path_prefix: /data/sftp
54
56
  file_ext: _20151020.tsv
55
57
  sequence_format: ".%01d%01d"
58
+ temp_file_threshold: 10737418240 # 10GiB
56
59
  ```
57
60
 
58
61
  With proxy
data/build.gradle CHANGED
@@ -17,23 +17,24 @@ configurations {
17
17
  }
18
18
 
19
19
  group = "org.embulk.output.sftp"
20
- version = "0.1.10"
20
+ version = "0.1.11"
21
21
  sourceCompatibility = 1.7
22
22
  targetCompatibility = 1.7
23
23
 
24
24
  dependencies {
25
- compile "org.embulk:embulk-core:0.8.6"
26
- provided "org.embulk:embulk-core:0.8.6"
25
+ compile "org.embulk:embulk-core:0.9.7"
26
+ provided "org.embulk:embulk-core:0.9.7"
27
27
  // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
28
28
  compile "org.apache.commons:commons-vfs2:2.2"
29
29
  compile "commons-io:commons-io:2.6"
30
30
  compile "com.jcraft:jsch:0.1.54"
31
31
  testCompile "junit:junit:4.+"
32
- testCompile "org.embulk:embulk-core:0.8.6:tests"
33
- testCompile "org.embulk:embulk-standards:0.8.6"
32
+ testCompile "org.embulk:embulk-core:0.9.7:tests"
33
+ testCompile "org.embulk:embulk-standards:0.9.7"
34
34
  testCompile "org.apache.sshd:apache-sshd:1.1.0+"
35
35
  testCompile "org.littleshoot:littleproxy:1.1.0-beta1"
36
36
  testCompile "io.netty:netty-all:4.0.34.Final"
37
+ testCompile "org.mockito:mockito-core:2.+"
37
38
  }
38
39
 
39
40
  jacocoTestReport {
@@ -13,6 +13,9 @@ import org.embulk.spi.FileOutputPlugin;
13
13
  import org.embulk.spi.TransactionalFileOutput;
14
14
  import org.embulk.spi.unit.LocalFile;
15
15
 
16
+ import javax.validation.constraints.Max;
17
+ import javax.validation.constraints.Min;
18
+
16
19
  import java.util.List;
17
20
  import java.util.Map;
18
21
 
@@ -47,7 +50,7 @@ public class SftpFileOutputPlugin
47
50
 
48
51
  @Config("user_directory_is_root")
49
52
  @ConfigDefault("true")
50
- public Boolean getUserDirIsRoot();
53
+ public boolean getUserDirIsRoot();
51
54
 
52
55
  @Config("timeout")
53
56
  @ConfigDefault("600") // 10 minutes
@@ -73,7 +76,18 @@ public class SftpFileOutputPlugin
73
76
 
74
77
  @Config("rename_file_after_upload")
75
78
  @ConfigDefault("false")
76
- public Boolean getRenameFileAfterUpload();
79
+ public boolean getRenameFileAfterUpload();
80
+
81
+ // if `false`, plugin will use remote file as buffer
82
+ @Config("local_buffering")
83
+ @ConfigDefault("true")
84
+ public boolean getLocalBuffering();
85
+
86
+ @Min(50L * 1024 * 1024) // 50MiB
87
+ @Max(10L * 1024 * 1024 * 1024) // 10GiB
88
+ @Config("temp_file_threshold")
89
+ @ConfigDefault("5368709120") // 5GiB
90
+ public long getTempFileThreshold();
77
91
  }
78
92
 
79
93
  @Override
@@ -141,6 +155,9 @@ public class SftpFileOutputPlugin
141
155
  public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
142
156
  {
143
157
  final PluginTask task = taskSource.loadTask(PluginTask.class);
144
- return new SftpFileOutput(task, taskIndex);
158
+ if (task.getLocalBuffering()) {
159
+ return new SftpLocalFileOutput(task, taskIndex);
160
+ }
161
+ return new SftpRemoteFileOutput(task, taskIndex);
145
162
  }
146
163
  }
@@ -0,0 +1,223 @@
1
+ package org.embulk.output.sftp;
2
+
3
+ import com.google.common.annotations.VisibleForTesting;
4
+ import com.google.common.base.Throwables;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import org.apache.commons.vfs2.FileObject;
7
+ import org.embulk.config.TaskReport;
8
+ import org.embulk.output.sftp.utils.TimeoutCloser;
9
+ import org.embulk.spi.Buffer;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.FileOutput;
12
+ import org.embulk.spi.TransactionalFileOutput;
13
+ import org.slf4j.Logger;
14
+
15
+ import java.io.BufferedOutputStream;
16
+ import java.io.File;
17
+ import java.io.FileNotFoundException;
18
+ import java.io.FileOutputStream;
19
+ import java.io.IOException;
20
+ import java.io.OutputStream;
21
+ import java.util.ArrayList;
22
+ import java.util.List;
23
+ import java.util.Map;
24
+
25
+ import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
26
+
27
+ /**
28
+ * Created by takahiro.nakayama on 10/20/15.
29
+ */
30
+ public class SftpLocalFileOutput
31
+ implements FileOutput, TransactionalFileOutput
32
+ {
33
+ // to make it clear that it is a constant
34
+ static final String TMP_SUFFIX = ".tmp";
35
+
36
+ final Logger logger = Exec.getLogger(getClass());
37
+ private final String pathPrefix;
38
+ private final String sequenceFormat;
39
+ private final String fileNameExtension;
40
+ private boolean renameFileAfterUpload;
41
+
42
+ private final int taskIndex;
43
+ final SftpUtils sftpUtils;
44
+ int fileIndex = 0;
45
+ private File tempFile;
46
+ private BufferedOutputStream localOutput = null;
47
+ List<Map<String, String>> fileList = new ArrayList<>();
48
+ String curFilename;
49
+ String tempFilename;
50
+
51
+ /* for file splitting purpose */
52
+ private final long threshold; // local file size to flush (upload to server)
53
+ boolean appending = false; // when local file exceeds threshold, go to append mode
54
+ FileObject remoteFile;
55
+ BufferedOutputStream remoteOutput; // to keep output stream open during append mode
56
+ long bufLen = 0L; // local temp file size
57
+
58
+ SftpLocalFileOutput(PluginTask task, int taskIndex)
59
+ {
60
+ this.pathPrefix = task.getPathPrefix();
61
+ this.sequenceFormat = task.getSequenceFormat();
62
+ this.fileNameExtension = task.getFileNameExtension();
63
+ this.renameFileAfterUpload = task.getRenameFileAfterUpload();
64
+ this.taskIndex = taskIndex;
65
+ this.sftpUtils = new SftpUtils(task);
66
+ this.threshold = task.getTempFileThreshold();
67
+ }
68
+
69
+ @Override
70
+ public void nextFile()
71
+ {
72
+ closeCurrentFile();
73
+
74
+ try {
75
+ tempFile = Exec.getTempFileSpace().createTempFile();
76
+ localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
77
+ appending = false;
78
+ curFilename = getOutputFilePath();
79
+ tempFilename = curFilename + TMP_SUFFIX;
80
+ }
81
+ catch (FileNotFoundException e) {
82
+ logger.error(e.getMessage());
83
+ throw Throwables.propagate(e);
84
+ }
85
+ }
86
+
87
+ @Override
88
+ public void add(final Buffer buffer)
89
+ {
90
+ try {
91
+ final int len = buffer.limit();
92
+ if (bufLen + len > threshold) {
93
+ localOutput.close();
94
+ // into 'append' mode
95
+ appending = true;
96
+ flush();
97
+
98
+ // reset output stream (overwrite local temp file)
99
+ localOutput = new BufferedOutputStream(new FileOutputStream(tempFile));
100
+ bufLen = 0L;
101
+ }
102
+ localOutput.write(buffer.array(), buffer.offset(), len);
103
+ bufLen += len;
104
+ }
105
+ catch (IOException ex) {
106
+ throw Throwables.propagate(ex);
107
+ }
108
+ finally {
109
+ buffer.release();
110
+ }
111
+ }
112
+
113
+ @Override
114
+ public void finish()
115
+ {
116
+ closeCurrentFile();
117
+ try {
118
+ flush();
119
+ }
120
+ catch (IOException e) {
121
+ throw Throwables.propagate(e);
122
+ }
123
+ closeRemoteFile();
124
+ // if input config is not `renameFileAfterUpload`
125
+ // and file is being split, we have to rename it here
126
+ // otherwise, when it exits, it won't rename
127
+ if (!renameFileAfterUpload && appending) {
128
+ sftpUtils.renameFile(tempFilename, curFilename);
129
+ }
130
+ fileList.add(fileReport());
131
+ fileIndex++;
132
+ }
133
+
134
+ @Override
135
+ public void close()
136
+ {
137
+ closeCurrentFile();
138
+ // TODO
139
+ sftpUtils.close();
140
+ }
141
+
142
+ @Override
143
+ public void abort()
144
+ {
145
+ // delete incomplete files
146
+ if (renameFileAfterUpload) {
147
+ sftpUtils.deleteFile(tempFilename);
148
+ }
149
+ else {
150
+ sftpUtils.deleteFile(curFilename);
151
+ }
152
+ }
153
+
154
+ @Override
155
+ public TaskReport commit()
156
+ {
157
+ TaskReport report = Exec.newTaskReport();
158
+ report.set("file_list", fileList);
159
+ return report;
160
+ }
161
+
162
+ void closeCurrentFile()
163
+ {
164
+ try {
165
+ if (localOutput != null) {
166
+ localOutput.close();
167
+ localOutput = null;
168
+ }
169
+ }
170
+ catch (IOException ex) {
171
+ throw Throwables.propagate(ex);
172
+ }
173
+ }
174
+
175
+ void closeRemoteFile()
176
+ {
177
+ if (remoteOutput != null) {
178
+ new TimeoutCloser(remoteOutput).close();
179
+ remoteOutput = null;
180
+ remoteFile = null;
181
+ }
182
+ }
183
+
184
+ String getOutputFilePath()
185
+ {
186
+ return pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + fileNameExtension;
187
+ }
188
+
189
+ Map<String, String> fileReport()
190
+ {
191
+ return ImmutableMap.of(
192
+ "temporary_filename", tempFilename,
193
+ "real_filename", curFilename
194
+ );
195
+ }
196
+
197
+ private void flush() throws IOException
198
+ {
199
+ if (appending) {
200
+ // open and keep stream open
201
+ if (remoteOutput == null) {
202
+ remoteFile = sftpUtils.resolve(tempFilename);
203
+ remoteOutput = sftpUtils.openStream(remoteFile);
204
+ }
205
+ sftpUtils.appendFile(tempFile, remoteFile, remoteOutput);
206
+ }
207
+ else {
208
+ sftpUtils.uploadFile(tempFile, renameFileAfterUpload ? tempFilename : curFilename);
209
+ }
210
+ }
211
+
212
+ @VisibleForTesting
213
+ OutputStream getLocalOutput()
214
+ {
215
+ return localOutput;
216
+ }
217
+
218
+ @VisibleForTesting
219
+ OutputStream getRemoteOutput()
220
+ {
221
+ return remoteOutput;
222
+ }
223
+ }
@@ -0,0 +1,120 @@
1
+ package org.embulk.output.sftp;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import com.google.common.util.concurrent.AbstractScheduledService;
5
+ import com.google.common.util.concurrent.Service;
6
+ import org.apache.commons.vfs2.FileSystemException;
7
+ import org.embulk.output.sftp.utils.TimedCallable;
8
+ import org.embulk.spi.Buffer;
9
+
10
+ import java.io.BufferedOutputStream;
11
+ import java.util.concurrent.ExecutionException;
12
+ import java.util.concurrent.TimeUnit;
13
+ import java.util.concurrent.TimeoutException;
14
+
15
+ import static org.embulk.output.sftp.SftpFileOutputPlugin.PluginTask;
16
+
17
+ public class SftpRemoteFileOutput extends SftpLocalFileOutput
18
+ {
19
+ private static final int TIMEOUT = 60; // 1min
20
+ private Service watcher;
21
+
22
+ SftpRemoteFileOutput(PluginTask task, int taskIndex)
23
+ {
24
+ super(task, taskIndex);
25
+ appending = true;
26
+ }
27
+
28
+ @Override
29
+ public void add(final Buffer buffer)
30
+ {
31
+ try {
32
+ final int len = buffer.limit();
33
+ // time-out write
34
+ new TimedCallable<Void>()
35
+ {
36
+ @Override
37
+ public Void call() throws Exception
38
+ {
39
+ remoteOutput.write(buffer.array(), buffer.offset(), len);
40
+ return null;
41
+ }
42
+ }.call(TIMEOUT, TimeUnit.SECONDS);
43
+ bufLen += len;
44
+ }
45
+ catch (InterruptedException | ExecutionException | TimeoutException ex) {
46
+ logger.error("Failed to write buffer", ex);
47
+ stopWatcher();
48
+ throw Throwables.propagate(ex);
49
+ }
50
+ finally {
51
+ buffer.release();
52
+ }
53
+ }
54
+
55
+ @Override
56
+ void closeCurrentFile()
57
+ {
58
+ super.closeCurrentFile();
59
+ stopWatcher();
60
+ }
61
+
62
+ void stopWatcher()
63
+ {
64
+ if (watcher != null) {
65
+ watcher.stopAsync();
66
+ }
67
+ }
68
+
69
+ @Override
70
+ public void nextFile()
71
+ {
72
+ closeCurrentFile();
73
+
74
+ try {
75
+ curFilename = getOutputFilePath();
76
+ tempFilename = curFilename + TMP_SUFFIX;
77
+ // resolve remote file & open output stream
78
+ remoteFile = sftpUtils.newSftpFile(sftpUtils.getSftpFileUri(tempFilename));
79
+ // this is where it's different from |SftpLocalFileOutput|
80
+ remoteOutput = new BufferedOutputStream(remoteFile.getContent().getOutputStream());
81
+ watcher = newProgressWatcher().startAsync();
82
+ }
83
+ catch (FileSystemException e) {
84
+ stopWatcher();
85
+ throw Throwables.propagate(e);
86
+ }
87
+ }
88
+
89
+ @Override
90
+ public void finish()
91
+ {
92
+ closeCurrentFile();
93
+ closeRemoteFile();
94
+ fileList.add(fileReport());
95
+ fileIndex++;
96
+ stopWatcher();
97
+ }
98
+
99
+ private Service newProgressWatcher()
100
+ {
101
+ return new AbstractScheduledService()
102
+ {
103
+ private static final int PERIOD = 10; // seconds
104
+ private long prevLen = 0L;
105
+
106
+ @Override
107
+ protected void runOneIteration()
108
+ {
109
+ logger.info("Upload progress: {} KB - {} KB/s", bufLen / 1024, (bufLen - prevLen) / 1024 / PERIOD);
110
+ prevLen = bufLen;
111
+ }
112
+
113
+ @Override
114
+ protected Scheduler scheduler()
115
+ {
116
+ return Scheduler.newFixedRateSchedule(PERIOD, PERIOD, TimeUnit.SECONDS);
117
+ }
118
+ };
119
+ }
120
+ }