embulk-input-swift 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +48 -0
- data/build.gradle +98 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/input/swift.rb +3 -0
- data/src/main/java/org/embulk/input/swift/FileList.java +300 -0
- data/src/main/java/org/embulk/input/swift/SwiftFileInputPlugin.java +307 -0
- data/src/test/java/org/embulk/input/swift/TestSwiftFileInputPlugin.java +9 -0
- metadata +96 -0
data/gradlew.bat
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
@@ -0,0 +1,300 @@
|
|
1
|
+
package org.embulk.input.swift;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.AbstractList;
|
5
|
+
import java.util.ArrayList;
|
6
|
+
import java.util.zip.GZIPInputStream;
|
7
|
+
import java.util.zip.GZIPOutputStream;
|
8
|
+
import java.util.regex.Pattern;
|
9
|
+
import java.util.regex.Matcher;
|
10
|
+
import java.io.InputStream;
|
11
|
+
import java.io.OutputStream;
|
12
|
+
import java.io.BufferedOutputStream;
|
13
|
+
import java.io.BufferedInputStream;
|
14
|
+
import java.io.ByteArrayInputStream;
|
15
|
+
import java.io.ByteArrayOutputStream;
|
16
|
+
import java.io.IOException;
|
17
|
+
import java.nio.ByteBuffer;
|
18
|
+
import java.nio.charset.StandardCharsets;
|
19
|
+
import org.embulk.config.Config;
|
20
|
+
import org.embulk.config.ConfigDefault;
|
21
|
+
import org.embulk.config.ConfigSource;
|
22
|
+
import com.google.common.base.Throwables;
|
23
|
+
import com.google.common.base.Optional;
|
24
|
+
import com.google.common.collect.ImmutableList;
|
25
|
+
import com.fasterxml.jackson.annotation.JsonProperty;
|
26
|
+
import com.fasterxml.jackson.annotation.JsonIgnore;
|
27
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
28
|
+
|
29
|
+
// this class was reused from embulk-input-s2(https://github.com/embulk/embulk-input-s3) plugin
|
30
|
+
public class FileList {
|
31
|
+
public interface Task {
|
32
|
+
@Config("path_match_pattern")
|
33
|
+
@ConfigDefault("\".*\"")
|
34
|
+
String getPathMatchPattern();
|
35
|
+
|
36
|
+
@Config("total_file_count_limit")
|
37
|
+
@ConfigDefault("2147483647")
|
38
|
+
int getTotalFileCountLimit();
|
39
|
+
|
40
|
+
// TODO support more algorithms to combine tasks
|
41
|
+
@Config("min_task_size")
|
42
|
+
@ConfigDefault("0")
|
43
|
+
long getMinTaskSize();
|
44
|
+
}
|
45
|
+
|
46
|
+
public static class Entry {
|
47
|
+
private int index;
|
48
|
+
private long size;
|
49
|
+
|
50
|
+
@JsonCreator
|
51
|
+
public Entry(
|
52
|
+
@JsonProperty("index") int index,
|
53
|
+
@JsonProperty("size") long size) {
|
54
|
+
this.index = index;
|
55
|
+
this.size = size;
|
56
|
+
}
|
57
|
+
|
58
|
+
@JsonProperty("index")
|
59
|
+
public int getIndex() {
|
60
|
+
return index;
|
61
|
+
}
|
62
|
+
|
63
|
+
@JsonProperty("size")
|
64
|
+
public long getSize() {
|
65
|
+
return size;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
public static class Builder {
|
70
|
+
private final ByteArrayOutputStream binary;
|
71
|
+
private final OutputStream stream;
|
72
|
+
private final List<Entry> entries = new ArrayList<>();
|
73
|
+
private String last = null;
|
74
|
+
|
75
|
+
private int limitCount = Integer.MAX_VALUE;
|
76
|
+
private long minTaskSize = 1;
|
77
|
+
private Pattern pathMatchPattern;
|
78
|
+
|
79
|
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
80
|
+
|
81
|
+
public Builder(Task task) {
|
82
|
+
this();
|
83
|
+
this.pathMatchPattern = Pattern.compile(task.getPathMatchPattern());
|
84
|
+
this.limitCount = task.getTotalFileCountLimit();
|
85
|
+
this.minTaskSize = task.getMinTaskSize();
|
86
|
+
}
|
87
|
+
|
88
|
+
public Builder(ConfigSource config) {
|
89
|
+
this();
|
90
|
+
this.pathMatchPattern = Pattern.compile(config.get(String.class, "path_match_pattern", ".*"));
|
91
|
+
this.limitCount = config.get(int.class, "total_file_count_limit", Integer.MAX_VALUE);
|
92
|
+
this.minTaskSize = config.get(long.class, "min_task_size", 0L);
|
93
|
+
}
|
94
|
+
|
95
|
+
public Builder() {
|
96
|
+
binary = new ByteArrayOutputStream();
|
97
|
+
try {
|
98
|
+
stream = new BufferedOutputStream(new GZIPOutputStream(binary));
|
99
|
+
} catch (IOException ex) {
|
100
|
+
throw Throwables.propagate(ex);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
public Builder limitTotalFileCount(int limitCount) {
|
105
|
+
this.limitCount = limitCount;
|
106
|
+
return this;
|
107
|
+
}
|
108
|
+
|
109
|
+
public Builder minTaskSize(long bytes) {
|
110
|
+
this.minTaskSize = bytes;
|
111
|
+
return this;
|
112
|
+
}
|
113
|
+
|
114
|
+
public Builder pathMatchPattern(String pattern) {
|
115
|
+
this.pathMatchPattern = Pattern.compile(pattern);
|
116
|
+
return this;
|
117
|
+
}
|
118
|
+
|
119
|
+
public int size() {
|
120
|
+
return entries.size();
|
121
|
+
}
|
122
|
+
|
123
|
+
public boolean needsMore() {
|
124
|
+
return size() < limitCount;
|
125
|
+
}
|
126
|
+
|
127
|
+
// returns true if this file is used
|
128
|
+
public synchronized boolean add(String path, long size) {
|
129
|
+
// TODO throw IllegalStateException if stream is already closed
|
130
|
+
|
131
|
+
if (!needsMore()) {
|
132
|
+
return false;
|
133
|
+
}
|
134
|
+
|
135
|
+
if (!pathMatchPattern.matcher(path).find()) {
|
136
|
+
return false;
|
137
|
+
}
|
138
|
+
|
139
|
+
int index = entries.size();
|
140
|
+
entries.add(new Entry(index, size));
|
141
|
+
|
142
|
+
byte[] data = path.getBytes(StandardCharsets.UTF_8);
|
143
|
+
castBuffer.putInt(0, data.length);
|
144
|
+
try {
|
145
|
+
stream.write(castBuffer.array());
|
146
|
+
stream.write(data);
|
147
|
+
} catch (IOException ex) {
|
148
|
+
throw Throwables.propagate(ex);
|
149
|
+
}
|
150
|
+
|
151
|
+
last = path;
|
152
|
+
return true;
|
153
|
+
}
|
154
|
+
|
155
|
+
public FileList build() {
|
156
|
+
try {
|
157
|
+
stream.close();
|
158
|
+
} catch (IOException ex) {
|
159
|
+
throw Throwables.propagate(ex);
|
160
|
+
}
|
161
|
+
return new FileList(binary.toByteArray(), getSplits(entries), Optional.fromNullable(last));
|
162
|
+
}
|
163
|
+
|
164
|
+
private List<List<Entry>> getSplits(List<Entry> all) {
|
165
|
+
List<List<Entry>> tasks = new ArrayList<>();
|
166
|
+
long currentTaskSize = 0;
|
167
|
+
List<Entry> currentTask = new ArrayList<>();
|
168
|
+
for (Entry entry : all) {
|
169
|
+
currentTask.add(entry);
|
170
|
+
currentTaskSize += entry.getSize(); // TODO consider to multiply the size by cost_per_byte, and add cost_per_file
|
171
|
+
if (currentTaskSize >= minTaskSize) {
|
172
|
+
tasks.add(currentTask);
|
173
|
+
currentTask = new ArrayList<>();
|
174
|
+
currentTaskSize = 0;
|
175
|
+
}
|
176
|
+
}
|
177
|
+
if (!currentTask.isEmpty()) {
|
178
|
+
tasks.add(currentTask);
|
179
|
+
}
|
180
|
+
return tasks;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
|
184
|
+
private final byte[] data;
|
185
|
+
private final List<List<Entry>> tasks;
|
186
|
+
private final Optional<String> last;
|
187
|
+
|
188
|
+
@JsonCreator
|
189
|
+
@Deprecated
|
190
|
+
public FileList(
|
191
|
+
@JsonProperty("data") byte[] data,
|
192
|
+
@JsonProperty("tasks") List<List<Entry>> tasks,
|
193
|
+
@JsonProperty("last") Optional<String> last) {
|
194
|
+
this.data = data;
|
195
|
+
this.tasks = tasks;
|
196
|
+
this.last = last;
|
197
|
+
}
|
198
|
+
|
199
|
+
@JsonIgnore
|
200
|
+
public Optional<String> getLastPath(Optional<String> lastLastPath) {
|
201
|
+
if (last.isPresent()) {
|
202
|
+
return last;
|
203
|
+
}
|
204
|
+
return lastLastPath;
|
205
|
+
}
|
206
|
+
|
207
|
+
@JsonIgnore
|
208
|
+
public int getTaskCount() {
|
209
|
+
return tasks.size();
|
210
|
+
}
|
211
|
+
|
212
|
+
@JsonIgnore
|
213
|
+
public List<String> get(int i) {
|
214
|
+
return new EntryList(data, tasks.get(i));
|
215
|
+
}
|
216
|
+
|
217
|
+
@JsonProperty("data")
|
218
|
+
@Deprecated
|
219
|
+
public byte[] getData() {
|
220
|
+
return data;
|
221
|
+
}
|
222
|
+
|
223
|
+
@JsonProperty("tasks")
|
224
|
+
@Deprecated
|
225
|
+
public List<List<Entry>> getTasks() {
|
226
|
+
return tasks;
|
227
|
+
}
|
228
|
+
|
229
|
+
@JsonProperty("last")
|
230
|
+
@Deprecated
|
231
|
+
public Optional<String> getLast() {
|
232
|
+
return last;
|
233
|
+
}
|
234
|
+
|
235
|
+
private class EntryList
|
236
|
+
extends AbstractList<String> {
|
237
|
+
private final byte[] data;
|
238
|
+
private final List<Entry> entries;
|
239
|
+
private InputStream stream;
|
240
|
+
private int current;
|
241
|
+
|
242
|
+
private final ByteBuffer castBuffer = ByteBuffer.allocate(4);
|
243
|
+
|
244
|
+
public EntryList(byte[] data, List<Entry> entries) {
|
245
|
+
this.data = data;
|
246
|
+
this.entries = entries;
|
247
|
+
try {
|
248
|
+
this.stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
|
249
|
+
} catch (IOException ex) {
|
250
|
+
throw Throwables.propagate(ex);
|
251
|
+
}
|
252
|
+
this.current = 0;
|
253
|
+
}
|
254
|
+
|
255
|
+
@Override
|
256
|
+
public synchronized String get(int i) {
|
257
|
+
Entry e = entries.get(i);
|
258
|
+
if (e.getIndex() < current) {
|
259
|
+
// rewind to the head
|
260
|
+
try {
|
261
|
+
stream.close();
|
262
|
+
stream = new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(data)));
|
263
|
+
} catch (IOException ex) {
|
264
|
+
throw Throwables.propagate(ex);
|
265
|
+
}
|
266
|
+
current = 0;
|
267
|
+
}
|
268
|
+
|
269
|
+
while (current < e.getIndex()) {
|
270
|
+
readNext();
|
271
|
+
}
|
272
|
+
// now current == e.getIndex()
|
273
|
+
return readNextString();
|
274
|
+
}
|
275
|
+
|
276
|
+
@Override
|
277
|
+
public int size() {
|
278
|
+
return entries.size();
|
279
|
+
}
|
280
|
+
|
281
|
+
private byte[] readNext() {
|
282
|
+
try {
|
283
|
+
stream.read(castBuffer.array());
|
284
|
+
int n = castBuffer.getInt(0);
|
285
|
+
byte[] b = new byte[n]; // here should be able to use a pooled buffer because read data is ignored if readNextString doesn't call this method
|
286
|
+
stream.read(b);
|
287
|
+
|
288
|
+
current++;
|
289
|
+
|
290
|
+
return b;
|
291
|
+
} catch (IOException ex) {
|
292
|
+
throw Throwables.propagate(ex);
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
private String readNextString() {
|
297
|
+
return new String(readNext(), StandardCharsets.UTF_8);
|
298
|
+
}
|
299
|
+
}
|
300
|
+
}
|
@@ -0,0 +1,307 @@
|
|
1
|
+
package org.embulk.input.swift;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.io.InputStream;
|
5
|
+
import java.io.InterruptedIOException;
|
6
|
+
import java.util.Collection;
|
7
|
+
import java.util.Iterator;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
import com.google.common.annotations.VisibleForTesting;
|
11
|
+
import com.google.common.base.Optional;
|
12
|
+
import com.google.common.base.Throwables;
|
13
|
+
import org.embulk.config.*;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.FileInputPlugin;
|
16
|
+
import org.embulk.spi.BufferAllocator;
|
17
|
+
import org.embulk.spi.TransactionalFileInput;
|
18
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
19
|
+
import org.embulk.spi.util.ResumableInputStream;
|
20
|
+
import org.embulk.spi.util.RetryExecutor;
|
21
|
+
import org.javaswift.joss.client.factory.AccountConfig;
|
22
|
+
import org.javaswift.joss.client.factory.AccountFactory;
|
23
|
+
import org.javaswift.joss.client.factory.AuthenticationMethod;
|
24
|
+
import org.javaswift.joss.headers.object.range.ExcludeStartRange;
|
25
|
+
import org.javaswift.joss.headers.object.range.LastPartRange;
|
26
|
+
import org.javaswift.joss.headers.object.range.MidPartRange;
|
27
|
+
import org.javaswift.joss.instructions.DownloadInstructions;
|
28
|
+
import org.javaswift.joss.model.Account;
|
29
|
+
import org.javaswift.joss.model.Container;
|
30
|
+
import org.javaswift.joss.model.StoredObject;
|
31
|
+
import org.slf4j.Logger;
|
32
|
+
|
33
|
+
import static org.embulk.spi.util.RetryExecutor.retryExecutor;
|
34
|
+
|
35
|
+
public class SwiftFileInputPlugin
|
36
|
+
implements FileInputPlugin {
|
37
|
+
public interface PluginTask
|
38
|
+
extends FileList.Task, Task {
|
39
|
+
@Config("username")
|
40
|
+
public String getUsername();
|
41
|
+
|
42
|
+
@Config("password")
|
43
|
+
public String getPassword();
|
44
|
+
|
45
|
+
@Config("auth_url")
|
46
|
+
public String getAuthUrl();
|
47
|
+
|
48
|
+
@Config("auth_type")
|
49
|
+
public String getAuthType();
|
50
|
+
|
51
|
+
@Config("tenant_id")
|
52
|
+
@ConfigDefault("null")
|
53
|
+
public Optional<String> getTenantId();
|
54
|
+
|
55
|
+
@Config("tenant_name")
|
56
|
+
@ConfigDefault("null")
|
57
|
+
public Optional<String> getTenantName();
|
58
|
+
|
59
|
+
@Config("container")
|
60
|
+
public String getContainer();
|
61
|
+
|
62
|
+
@Config("path_prefix")
|
63
|
+
public String getPathPrefix();
|
64
|
+
|
65
|
+
@Config("last_path")
|
66
|
+
@ConfigDefault("null")
|
67
|
+
public Optional<String> getLastPath();
|
68
|
+
|
69
|
+
@Config("incremental")
|
70
|
+
@ConfigDefault("true")
|
71
|
+
public boolean getIncremental();
|
72
|
+
|
73
|
+
public FileList getFiles();
|
74
|
+
|
75
|
+
public void setFiles(FileList files);
|
76
|
+
|
77
|
+
@ConfigInject
|
78
|
+
public BufferAllocator getBufferAllocator();
|
79
|
+
}
|
80
|
+
|
81
|
+
/**
|
82
|
+
* Logger
|
83
|
+
*/
|
84
|
+
private static final Logger LOGGER = Exec.getLogger(SwiftFileInputPlugin.class);
|
85
|
+
|
86
|
+
private Account getAccount(PluginTask task) {
|
87
|
+
AccountConfig accountConfig = new AccountConfig();
|
88
|
+
|
89
|
+
String auth_type = task.getAuthType();
|
90
|
+
accountConfig.setAuthUrl(task.getAuthUrl());
|
91
|
+
accountConfig.setUsername(task.getUsername());
|
92
|
+
accountConfig.setPassword(task.getPassword());
|
93
|
+
|
94
|
+
Optional<String> tenant_id = task.getTenantId();
|
95
|
+
if (tenant_id.isPresent()) {
|
96
|
+
accountConfig.setTenantId(tenant_id.get());
|
97
|
+
}
|
98
|
+
Optional<String> tenant_name = task.getTenantName();
|
99
|
+
if (tenant_name.isPresent()) {
|
100
|
+
accountConfig.setTenantName(tenant_name.get());
|
101
|
+
}
|
102
|
+
|
103
|
+
if (auth_type.equals("keystone")) {
|
104
|
+
if (!tenant_id.isPresent() && !tenant_name.isPresent()) {
|
105
|
+
throw new ConfigException("if you choose keystone auth, you must specify to either tenant_id or tenant_name.");
|
106
|
+
}
|
107
|
+
accountConfig.setAuthenticationMethod(AuthenticationMethod.KEYSTONE);
|
108
|
+
} else if (auth_type.equals("tempauth")) {
|
109
|
+
accountConfig.setAuthenticationMethod(AuthenticationMethod.TEMPAUTH);
|
110
|
+
} else if (auth_type.equals("basic")) {
|
111
|
+
accountConfig.setAuthenticationMethod(AuthenticationMethod.BASIC);
|
112
|
+
} else {
|
113
|
+
throw new ConfigException("auth_type has to be either keystone, tempauth or basic.");
|
114
|
+
}
|
115
|
+
|
116
|
+
return new AccountFactory(accountConfig).createAccount();
|
117
|
+
}
|
118
|
+
|
119
|
+
/**
|
120
|
+
* retrieve target objects with specified prefix
|
121
|
+
* @param task PluginTsak
|
122
|
+
* @return List of Target Objects
|
123
|
+
*/
|
124
|
+
private FileList listFiles(PluginTask task) {
|
125
|
+
FileList.Builder builder = new FileList.Builder(task);
|
126
|
+
Account account = this.getAccount(task);
|
127
|
+
Container container = account.getContainer(task.getContainer());
|
128
|
+
|
129
|
+
// if the container is not exist, cannot input.
|
130
|
+
if (container.exists() == false) {
|
131
|
+
throw new ConfigException("Container not found");
|
132
|
+
}
|
133
|
+
|
134
|
+
String marker = task.getLastPath().orNull();
|
135
|
+
|
136
|
+
do {
|
137
|
+
Collection<StoredObject> objectList = container.list(task.getPathPrefix(), marker, 1024);
|
138
|
+
if (objectList.size() > 0) {
|
139
|
+
for (StoredObject obj : objectList) {
|
140
|
+
if (obj.getContentLength() > 0) {
|
141
|
+
LOGGER.info("add {}", obj.getName());
|
142
|
+
builder.add(obj.getName(), obj.getContentLength());
|
143
|
+
if (!builder.needsMore()) {
|
144
|
+
return builder.build();
|
145
|
+
}
|
146
|
+
}
|
147
|
+
marker = obj.getName();
|
148
|
+
}
|
149
|
+
} else {
|
150
|
+
break;
|
151
|
+
}
|
152
|
+
} while (marker != null);
|
153
|
+
|
154
|
+
return builder.build();
|
155
|
+
}
|
156
|
+
|
157
|
+
@Override
|
158
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control) {
|
159
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
160
|
+
|
161
|
+
//set input files
|
162
|
+
task.setFiles(this.listFiles(task));
|
163
|
+
int taskCount = task.getFiles().getTaskCount();
|
164
|
+
|
165
|
+
return resume(task.dump(), taskCount, control);
|
166
|
+
}
|
167
|
+
|
168
|
+
@Override
|
169
|
+
public ConfigDiff resume(TaskSource taskSource,
|
170
|
+
int taskCount,
|
171
|
+
FileInputPlugin.Control control) {
|
172
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
173
|
+
|
174
|
+
//validate
|
175
|
+
this.getAccount(task);
|
176
|
+
|
177
|
+
control.run(taskSource, taskCount);
|
178
|
+
|
179
|
+
ConfigDiff configDiff = Exec.newConfigDiff();
|
180
|
+
|
181
|
+
if (task.getIncremental()) {
|
182
|
+
configDiff.set("last_path", task.getFiles().getLastPath(task.getLastPath()));
|
183
|
+
}
|
184
|
+
|
185
|
+
return configDiff;
|
186
|
+
}
|
187
|
+
|
188
|
+
@Override
|
189
|
+
public void cleanup(TaskSource taskSource,
|
190
|
+
int taskCount,
|
191
|
+
List<TaskReport> successTaskReports) {
|
192
|
+
}
|
193
|
+
|
194
|
+
@Override
|
195
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex) {
|
196
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
197
|
+
|
198
|
+
return new SwiftFileInput(task, taskIndex);
|
199
|
+
}
|
200
|
+
|
201
|
+
//private static InputStream openInputStream(PluginTask task, String path)
|
202
|
+
//{
|
203
|
+
// return new MyInputStream(file);
|
204
|
+
//}
|
205
|
+
|
206
|
+
|
207
|
+
@VisibleForTesting
|
208
|
+
static class SwiftInputStreamReopener
|
209
|
+
implements ResumableInputStream.Reopener {
|
210
|
+
private final Logger LOGGER = Exec.getLogger(SwiftInputStreamReopener.class);
|
211
|
+
|
212
|
+
private final StoredObject obj;
|
213
|
+
|
214
|
+
public SwiftInputStreamReopener(StoredObject obj) {
|
215
|
+
this.obj = obj;
|
216
|
+
}
|
217
|
+
|
218
|
+
@Override
|
219
|
+
public InputStream reopen(final long offset, final Exception closedCause) throws IOException {
|
220
|
+
try {
|
221
|
+
return retryExecutor()
|
222
|
+
.withRetryLimit(3)
|
223
|
+
.withInitialRetryWait(500)
|
224
|
+
.withMaxRetryWait(30 * 1000)
|
225
|
+
.runInterruptible(new RetryExecutor.Retryable<InputStream>() {
|
226
|
+
@Override
|
227
|
+
public InputStream call() throws InterruptedIOException {
|
228
|
+
LOGGER.warn(String.format("Swift read failed. Retrying GET request with %,d bytes offset", offset), closedCause);
|
229
|
+
return obj.downloadObjectAsInputStream(new DownloadInstructions().setRange(new ExcludeStartRange((int) offset)));
|
230
|
+
}
|
231
|
+
|
232
|
+
@Override
|
233
|
+
public boolean isRetryableException(Exception exception) {
|
234
|
+
return true; // TODO
|
235
|
+
}
|
236
|
+
|
237
|
+
@Override
|
238
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
239
|
+
throws RetryExecutor.RetryGiveupException {
|
240
|
+
String message = String.format("Swift GET request failed. Retrying %d/%d after %d seconds. Message: %s",
|
241
|
+
retryCount, retryLimit, retryWait / 1000, exception.getMessage());
|
242
|
+
if (retryCount % 3 == 0) {
|
243
|
+
LOGGER.warn(message, exception);
|
244
|
+
} else {
|
245
|
+
LOGGER.warn(message);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
|
249
|
+
@Override
|
250
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
251
|
+
throws RetryExecutor.RetryGiveupException {
|
252
|
+
}
|
253
|
+
});
|
254
|
+
} catch (RetryExecutor.RetryGiveupException ex) {
|
255
|
+
Throwables.propagateIfInstanceOf(ex.getCause(), IOException.class);
|
256
|
+
throw Throwables.propagate(ex.getCause());
|
257
|
+
} catch (InterruptedException ex) {
|
258
|
+
throw new InterruptedIOException();
|
259
|
+
}
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
public class SwiftFileInput
|
264
|
+
extends InputStreamFileInput
|
265
|
+
implements TransactionalFileInput {
|
266
|
+
public SwiftFileInput(PluginTask task, int taskIndex) {
|
267
|
+
super(task.getBufferAllocator(), new SingleFileProvider(task, taskIndex));
|
268
|
+
}
|
269
|
+
|
270
|
+
public void abort() {
|
271
|
+
}
|
272
|
+
|
273
|
+
public TaskReport commit() {
|
274
|
+
return Exec.newTaskReport();
|
275
|
+
}
|
276
|
+
|
277
|
+
@Override
|
278
|
+
public void close() {
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
private class SingleFileProvider
|
283
|
+
implements InputStreamFileInput.Provider {
|
284
|
+
private Account account;
|
285
|
+
private final String containerName;
|
286
|
+
private final Iterator<String> iterator;
|
287
|
+
|
288
|
+
public SingleFileProvider(PluginTask task, int taskIndex) {
|
289
|
+
this.account = getAccount(task);
|
290
|
+
this.containerName = task.getContainer();
|
291
|
+
this.iterator = task.getFiles().get(taskIndex).iterator();
|
292
|
+
}
|
293
|
+
|
294
|
+
@Override
|
295
|
+
public InputStream openNext() throws IOException {
|
296
|
+
if (!iterator.hasNext()) {
|
297
|
+
return null;
|
298
|
+
}
|
299
|
+
StoredObject obj = this.account.getContainer(this.containerName).getObject(iterator.next());
|
300
|
+
return new ResumableInputStream(obj.downloadObjectAsInputStream(), new SwiftInputStreamReopener(obj));
|
301
|
+
}
|
302
|
+
|
303
|
+
@Override
|
304
|
+
public void close() {
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}
|