embulk-output-hdfs 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -1
- data/CHANGELOG.md +9 -0
- data/README.md +38 -9
- data/build.gradle +10 -8
- data/example/config.yml +3 -1
- data/example/config_deprecated_option.yml +52 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -2
- data/gradlew +43 -35
- data/gradlew.bat +4 -10
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutput.java +160 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +55 -175
- data/src/main/java/org/embulk/output/hdfs/ModeTask.java +111 -0
- data/src/main/java/org/embulk/output/hdfs/client/HdfsClient.java +269 -0
- data/src/main/java/org/embulk/output/hdfs/compat/ModeCompat.java +76 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/AbortIfExistTx.java +6 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/AbstractTx.java +53 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/ControlRun.java +10 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/DeleteFilesInAdvanceTx.java +22 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/DeleteRecursiveInAdvanceTx.java +22 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/OverwriteTx.java +11 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/ReplaceTx.java +62 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/Tx.java +13 -0
- data/src/main/java/org/embulk/output/hdfs/util/SafeWorkspaceName.java +21 -0
- data/src/main/java/org/embulk/output/hdfs/util/SamplePath.java +21 -0
- data/src/main/java/org/embulk/output/hdfs/util/StrftimeUtil.java +23 -0
- data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +153 -22
- metadata +87 -70
@@ -1,43 +1,32 @@
|
|
1
1
|
package org.embulk.output.hdfs;
|
2
2
|
|
3
3
|
import com.google.common.base.Optional;
|
4
|
-
import com.google.common.base.Throwables;
|
5
|
-
import org.apache.hadoop.conf.Configuration;
|
6
|
-
import org.apache.hadoop.fs.FileStatus;
|
7
|
-
import org.apache.hadoop.fs.FileSystem;
|
8
|
-
import org.apache.hadoop.fs.Path;
|
9
4
|
import org.embulk.config.Config;
|
10
5
|
import org.embulk.config.ConfigDefault;
|
11
6
|
import org.embulk.config.ConfigDiff;
|
12
|
-
import org.embulk.config.ConfigException;
|
13
7
|
import org.embulk.config.ConfigSource;
|
14
8
|
import org.embulk.config.Task;
|
15
9
|
import org.embulk.config.TaskReport;
|
16
10
|
import org.embulk.config.TaskSource;
|
17
|
-
import org.embulk.
|
11
|
+
import org.embulk.output.hdfs.ModeTask.Mode;
|
12
|
+
import org.embulk.output.hdfs.compat.ModeCompat;
|
13
|
+
import org.embulk.output.hdfs.transaction.ControlRun;
|
14
|
+
import org.embulk.output.hdfs.transaction.Tx;
|
18
15
|
import org.embulk.spi.Exec;
|
19
16
|
import org.embulk.spi.FileOutputPlugin;
|
20
17
|
import org.embulk.spi.TransactionalFileOutput;
|
21
|
-
import org.jruby.embed.ScriptingContainer;
|
22
18
|
import org.slf4j.Logger;
|
23
19
|
|
24
|
-
import java.io.File;
|
25
|
-
import java.io.IOException;
|
26
|
-
import java.io.OutputStream;
|
27
|
-
import java.net.URI;
|
28
|
-
import java.util.ArrayList;
|
29
20
|
import java.util.List;
|
30
21
|
import java.util.Map;
|
31
22
|
|
32
|
-
import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask.*;
|
33
|
-
|
34
23
|
public class HdfsFileOutputPlugin
|
35
24
|
implements FileOutputPlugin
|
36
25
|
{
|
37
26
|
private static final Logger logger = Exec.getLogger(HdfsFileOutputPlugin.class);
|
38
27
|
|
39
28
|
public interface PluginTask
|
40
|
-
extends Task
|
29
|
+
extends Task, ModeTask
|
41
30
|
{
|
42
31
|
@Config("config_files")
|
43
32
|
@ConfigDefault("[]")
|
@@ -61,197 +50,88 @@ public class HdfsFileOutputPlugin
|
|
61
50
|
@ConfigDefault("0")
|
62
51
|
int getRewindSeconds();
|
63
52
|
|
53
|
+
@Deprecated // Please Use `mode` option
|
64
54
|
@Config("overwrite")
|
65
|
-
@ConfigDefault("
|
66
|
-
|
55
|
+
@ConfigDefault("null")
|
56
|
+
Optional<Boolean> getOverwrite();
|
67
57
|
|
68
58
|
@Config("doas")
|
69
59
|
@ConfigDefault("null")
|
70
60
|
Optional<String> getDoas();
|
71
61
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
62
|
+
@Deprecated
|
63
|
+
enum DeleteInAdvancePolicy
|
64
|
+
{
|
65
|
+
NONE, FILE_ONLY, RECURSIVE
|
66
|
+
}
|
77
67
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
68
|
+
@Deprecated // Please Use `mode` option
|
69
|
+
@Config("delete_in_advance")
|
70
|
+
@ConfigDefault("null")
|
71
|
+
Optional<DeleteInAdvancePolicy> getDeleteInAdvance();
|
83
72
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
deleteInAdvance(fs, pathPrefix, task.getDeleteInAdvance());
|
88
|
-
}
|
89
|
-
catch (IOException e) {
|
90
|
-
throw Throwables.propagate(e);
|
91
|
-
}
|
73
|
+
@Config("workspace")
|
74
|
+
@ConfigDefault("\"/tmp\"")
|
75
|
+
String getWorkspace();
|
92
76
|
|
93
|
-
|
94
|
-
|
77
|
+
String getSafeWorkspace();
|
78
|
+
void setSafeWorkspace(String safeWorkspace);
|
95
79
|
}
|
96
80
|
|
97
|
-
|
98
|
-
public ConfigDiff resume(TaskSource taskSource,
|
99
|
-
int taskCount,
|
100
|
-
FileOutputPlugin.Control control)
|
81
|
+
private void compat(PluginTask task)
|
101
82
|
{
|
102
|
-
|
83
|
+
Mode modeCompat = ModeCompat.getMode(task, task.getOverwrite(), task.getDeleteInAdvance());
|
84
|
+
task.setMode(modeCompat);
|
103
85
|
}
|
104
86
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
87
|
+
// NOTE: This is to avoid the following error.
|
88
|
+
// Error: java.lang.RuntimeException: com.fasterxml.jackson.databind.JsonMappingException: Field 'SafeWorkspace' is required but not set
|
89
|
+
// at [Source: N/A; line: -1, column: -1]
|
90
|
+
private void avoidDatabindError(PluginTask task)
|
109
91
|
{
|
92
|
+
// Set default value
|
93
|
+
task.setSafeWorkspace("");
|
110
94
|
}
|
111
95
|
|
112
96
|
@Override
|
113
|
-
public
|
97
|
+
public ConfigDiff transaction(ConfigSource config, int taskCount,
|
98
|
+
final FileOutputPlugin.Control control)
|
114
99
|
{
|
115
|
-
final PluginTask task =
|
100
|
+
final PluginTask task = config.loadConfig(PluginTask.class);
|
101
|
+
compat(task);
|
102
|
+
avoidDatabindError(task);
|
116
103
|
|
117
|
-
|
118
|
-
|
119
|
-
final String sequenceFormat = task.getSequenceFormat();
|
120
|
-
|
121
|
-
return new TransactionalFileOutput()
|
104
|
+
Tx tx = task.getMode().newTx();
|
105
|
+
return tx.transaction(task, new ControlRun()
|
122
106
|
{
|
123
|
-
private final List<String> hdfsFileNames = new ArrayList<>();
|
124
|
-
private int fileIndex = 0;
|
125
|
-
private Path currentPath = null;
|
126
|
-
private OutputStream output = null;
|
127
|
-
|
128
|
-
@Override
|
129
|
-
public void nextFile()
|
130
|
-
{
|
131
|
-
closeCurrentStream();
|
132
|
-
currentPath = new Path(pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix);
|
133
|
-
fileIndex++;
|
134
|
-
}
|
135
|
-
|
136
|
-
@Override
|
137
|
-
public void add(Buffer buffer)
|
138
|
-
{
|
139
|
-
try {
|
140
|
-
// this implementation is for creating file when there is data.
|
141
|
-
if (output == null) {
|
142
|
-
FileSystem fs = getFs(task);
|
143
|
-
output = fs.create(currentPath, task.getOverwrite());
|
144
|
-
logger.info("Uploading '{}'", currentPath);
|
145
|
-
hdfsFileNames.add(currentPath.toString());
|
146
|
-
}
|
147
|
-
output.write(buffer.array(), buffer.offset(), buffer.limit());
|
148
|
-
}
|
149
|
-
catch (IOException e) {
|
150
|
-
throw new RuntimeException(e);
|
151
|
-
}
|
152
|
-
finally {
|
153
|
-
buffer.release();
|
154
|
-
}
|
155
|
-
}
|
156
|
-
|
157
107
|
@Override
|
158
|
-
public
|
108
|
+
public List<TaskReport> run()
|
159
109
|
{
|
160
|
-
|
110
|
+
return control.run(task.dump());
|
161
111
|
}
|
162
|
-
|
163
|
-
@Override
|
164
|
-
public void close()
|
165
|
-
{
|
166
|
-
closeCurrentStream();
|
167
|
-
}
|
168
|
-
|
169
|
-
@Override
|
170
|
-
public void abort()
|
171
|
-
{
|
172
|
-
}
|
173
|
-
|
174
|
-
@Override
|
175
|
-
public TaskReport commit()
|
176
|
-
{
|
177
|
-
TaskReport report = Exec.newTaskReport();
|
178
|
-
report.set("hdfs_file_names", hdfsFileNames);
|
179
|
-
return report;
|
180
|
-
}
|
181
|
-
|
182
|
-
private void closeCurrentStream()
|
183
|
-
{
|
184
|
-
if (output != null) {
|
185
|
-
try {
|
186
|
-
output.close();
|
187
|
-
output = null;
|
188
|
-
}
|
189
|
-
catch (IOException e) {
|
190
|
-
throw new RuntimeException(e);
|
191
|
-
}
|
192
|
-
}
|
193
|
-
}
|
194
|
-
};
|
112
|
+
});
|
195
113
|
}
|
196
114
|
|
197
|
-
|
198
|
-
|
115
|
+
@Override
|
116
|
+
public ConfigDiff resume(TaskSource taskSource,
|
117
|
+
int taskCount,
|
118
|
+
FileOutputPlugin.Control control)
|
199
119
|
{
|
200
|
-
|
201
|
-
|
202
|
-
for (String configFile : task.getConfigFiles()) {
|
203
|
-
File file = new File(configFile);
|
204
|
-
configuration.addResource(file.toURI().toURL());
|
205
|
-
}
|
206
|
-
|
207
|
-
for (Map.Entry<String, String> entry: task.getConfig().entrySet()) {
|
208
|
-
configuration.set(entry.getKey(), entry.getValue());
|
209
|
-
}
|
210
|
-
|
211
|
-
if (task.getDoas().isPresent()) {
|
212
|
-
URI uri = FileSystem.getDefaultUri(configuration);
|
213
|
-
try {
|
214
|
-
return FileSystem.get(uri, configuration, task.getDoas().get());
|
215
|
-
}
|
216
|
-
catch (InterruptedException e) {
|
217
|
-
throw Throwables.propagate(e);
|
218
|
-
}
|
219
|
-
}
|
220
|
-
return FileSystem.get(configuration);
|
120
|
+
throw new UnsupportedOperationException("hdfs output plugin does not support resuming");
|
221
121
|
}
|
222
122
|
|
223
|
-
|
123
|
+
@Override
|
124
|
+
public void cleanup(TaskSource taskSource,
|
125
|
+
int taskCount,
|
126
|
+
List<TaskReport> successTaskReports)
|
224
127
|
{
|
225
|
-
ScriptingContainer jruby = new ScriptingContainer();
|
226
|
-
Object resolved = jruby.runScriptlet(
|
227
|
-
String.format("(Time.now - %s).strftime('%s')", String.valueOf(rewind_seconds), raw));
|
228
|
-
return resolved.toString();
|
229
128
|
}
|
230
129
|
|
231
|
-
|
232
|
-
|
130
|
+
@Override
|
131
|
+
public TransactionalFileOutput open(TaskSource taskSource, int taskIndex)
|
233
132
|
{
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
// do nothing
|
238
|
-
break;
|
239
|
-
case FILE_ONLY:
|
240
|
-
for (FileStatus status : fs.globStatus(globPath)) {
|
241
|
-
if (status.isFile()) {
|
242
|
-
logger.debug("delete in advance: {}", status.getPath());
|
243
|
-
fs.delete(status.getPath(), false);
|
244
|
-
}
|
245
|
-
}
|
246
|
-
break;
|
247
|
-
case RECURSIVE:
|
248
|
-
for (FileStatus status : fs.globStatus(globPath)) {
|
249
|
-
logger.debug("delete in advance: {}", status.getPath());
|
250
|
-
fs.delete(status.getPath(), true);
|
251
|
-
}
|
252
|
-
break;
|
253
|
-
default:
|
254
|
-
throw new ConfigException("`delete_in_advance` must not null.");
|
255
|
-
}
|
133
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
134
|
+
Tx tx = task.getMode().newTx();
|
135
|
+
return tx.newOutput(task, taskSource, taskIndex);
|
256
136
|
}
|
257
137
|
}
|
@@ -0,0 +1,111 @@
|
|
1
|
+
package org.embulk.output.hdfs;
|
2
|
+
|
3
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
4
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
5
|
+
import com.google.common.base.Function;
|
6
|
+
import com.google.common.base.Joiner;
|
7
|
+
import com.google.common.collect.Lists;
|
8
|
+
import org.embulk.config.Config;
|
9
|
+
import org.embulk.config.ConfigDefault;
|
10
|
+
import org.embulk.config.ConfigException;
|
11
|
+
import org.embulk.config.Task;
|
12
|
+
import org.embulk.output.hdfs.transaction.AbortIfExistTx;
|
13
|
+
import org.embulk.output.hdfs.transaction.DeleteFilesInAdvanceTx;
|
14
|
+
import org.embulk.output.hdfs.transaction.DeleteRecursiveInAdvanceTx;
|
15
|
+
import org.embulk.output.hdfs.transaction.OverwriteTx;
|
16
|
+
import org.embulk.output.hdfs.transaction.ReplaceTx;
|
17
|
+
import org.embulk.output.hdfs.transaction.Tx;
|
18
|
+
import org.embulk.spi.Exec;
|
19
|
+
import org.slf4j.Logger;
|
20
|
+
|
21
|
+
import javax.annotation.Nullable;
|
22
|
+
|
23
|
+
import java.util.Locale;
|
24
|
+
|
25
|
+
public interface ModeTask
|
26
|
+
extends Task
|
27
|
+
{
|
28
|
+
@Config("mode")
|
29
|
+
@ConfigDefault("\"abort_if_exist\"")
|
30
|
+
Mode getMode();
|
31
|
+
|
32
|
+
void setMode(Mode mode);
|
33
|
+
|
34
|
+
enum Mode
|
35
|
+
{
|
36
|
+
ABORT_IF_EXIST,
|
37
|
+
OVERWRITE,
|
38
|
+
DELETE_FILES_IN_ADVANCE,
|
39
|
+
DELETE_RECURSIVE_IN_ADVANCE,
|
40
|
+
REPLACE;
|
41
|
+
|
42
|
+
private static final Logger logger = Exec.getLogger(Mode.class);
|
43
|
+
|
44
|
+
@Deprecated // For compat
|
45
|
+
public boolean isDefaultMode()
|
46
|
+
{
|
47
|
+
return this.equals(ABORT_IF_EXIST);
|
48
|
+
}
|
49
|
+
|
50
|
+
@JsonValue
|
51
|
+
@Override
|
52
|
+
public String toString()
|
53
|
+
{
|
54
|
+
return name().toLowerCase(Locale.ENGLISH);
|
55
|
+
}
|
56
|
+
|
57
|
+
@JsonCreator
|
58
|
+
@SuppressWarnings("unused")
|
59
|
+
public static Mode fromString(String value)
|
60
|
+
{
|
61
|
+
switch (value) {
|
62
|
+
case "abort_if_exist":
|
63
|
+
return ABORT_IF_EXIST;
|
64
|
+
case "overwrite":
|
65
|
+
return OVERWRITE;
|
66
|
+
case "delete_files_in_advance":
|
67
|
+
return DELETE_FILES_IN_ADVANCE;
|
68
|
+
case "delete_recursive_in_advance":
|
69
|
+
return DELETE_RECURSIVE_IN_ADVANCE;
|
70
|
+
case "replace":
|
71
|
+
return REPLACE;
|
72
|
+
default:
|
73
|
+
throw new ConfigException(String.format(
|
74
|
+
"Unknown mode `%s`. Supported mode is %s",
|
75
|
+
value,
|
76
|
+
Joiner.on(", ").join(
|
77
|
+
Lists.transform(Lists.newArrayList(Mode.values()), new Function<Mode, String>()
|
78
|
+
{
|
79
|
+
@Nullable
|
80
|
+
@Override
|
81
|
+
public String apply(@Nullable Mode input)
|
82
|
+
{
|
83
|
+
assert input != null;
|
84
|
+
return String.format("`%s`", input.toString());
|
85
|
+
}
|
86
|
+
})
|
87
|
+
)
|
88
|
+
));
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
public Tx newTx()
|
93
|
+
{
|
94
|
+
switch (this) {
|
95
|
+
case ABORT_IF_EXIST:
|
96
|
+
return new AbortIfExistTx();
|
97
|
+
case DELETE_FILES_IN_ADVANCE:
|
98
|
+
return new DeleteFilesInAdvanceTx();
|
99
|
+
case DELETE_RECURSIVE_IN_ADVANCE:
|
100
|
+
return new DeleteRecursiveInAdvanceTx();
|
101
|
+
case OVERWRITE:
|
102
|
+
return new OverwriteTx();
|
103
|
+
case REPLACE:
|
104
|
+
return new ReplaceTx();
|
105
|
+
default:
|
106
|
+
throw new IllegalStateException();
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
}
|
111
|
+
}
|
@@ -0,0 +1,269 @@
|
|
1
|
+
package org.embulk.output.hdfs.client;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.apache.hadoop.conf.Configuration;
|
5
|
+
import org.apache.hadoop.fs.FileContext;
|
6
|
+
import org.apache.hadoop.fs.FileStatus;
|
7
|
+
import org.apache.hadoop.fs.FileSystem;
|
8
|
+
import org.apache.hadoop.fs.Options;
|
9
|
+
import org.apache.hadoop.fs.Path;
|
10
|
+
import org.apache.hadoop.fs.Trash;
|
11
|
+
import org.embulk.config.ConfigException;
|
12
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin;
|
13
|
+
import org.embulk.spi.DataException;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.util.RetryExecutor;
|
16
|
+
import org.slf4j.Logger;
|
17
|
+
|
18
|
+
import java.io.File;
|
19
|
+
import java.io.OutputStream;
|
20
|
+
import java.net.MalformedURLException;
|
21
|
+
import java.net.URI;
|
22
|
+
import java.util.List;
|
23
|
+
import java.util.Map;
|
24
|
+
|
25
|
+
public class HdfsClient
|
26
|
+
{
|
27
|
+
public static HdfsClient build(HdfsFileOutputPlugin.PluginTask task)
|
28
|
+
{
|
29
|
+
Configuration conf = buildConfiguration(task.getConfigFiles(), task.getConfig());
|
30
|
+
return new HdfsClient(conf, task.getDoas());
|
31
|
+
}
|
32
|
+
|
33
|
+
;
|
34
|
+
|
35
|
+
public static Configuration buildConfiguration(List<String> configFiles, Map<String, String> configs)
|
36
|
+
{
|
37
|
+
Configuration c = new Configuration();
|
38
|
+
for (String configFile : configFiles) {
|
39
|
+
File file = new File(configFile);
|
40
|
+
try {
|
41
|
+
c.addResource(file.toURI().toURL());
|
42
|
+
}
|
43
|
+
catch (MalformedURLException e) {
|
44
|
+
throw new ConfigException(e);
|
45
|
+
}
|
46
|
+
}
|
47
|
+
for (Map.Entry<String, String> config : configs.entrySet()) {
|
48
|
+
c.set(config.getKey(), config.getValue());
|
49
|
+
}
|
50
|
+
return c;
|
51
|
+
}
|
52
|
+
|
53
|
+
private static Logger logger = Exec.getLogger(HdfsClient.class);
|
54
|
+
private final Configuration conf;
|
55
|
+
private final FileSystem fs;
|
56
|
+
private final Optional<String> user;
|
57
|
+
private final RetryExecutor re = RetryExecutor.retryExecutor()
|
58
|
+
.withRetryLimit(3)
|
59
|
+
.withMaxRetryWait(500) // ms
|
60
|
+
.withMaxRetryWait(10 * 60 * 1000); // ms
|
61
|
+
|
62
|
+
private HdfsClient(Configuration conf, Optional<String> user)
|
63
|
+
{
|
64
|
+
this.conf = conf;
|
65
|
+
this.user = user;
|
66
|
+
this.fs = getFs(conf, user);
|
67
|
+
}
|
68
|
+
|
69
|
+
private abstract static class Retryable<T>
|
70
|
+
implements RetryExecutor.Retryable<T>
|
71
|
+
{
|
72
|
+
@Override
|
73
|
+
public boolean isRetryableException(Exception exception)
|
74
|
+
{
|
75
|
+
return true; // TODO: which Exception is retryable?
|
76
|
+
}
|
77
|
+
|
78
|
+
@Override
|
79
|
+
public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
|
80
|
+
throws RetryExecutor.RetryGiveupException
|
81
|
+
{
|
82
|
+
String m = String.format(
|
83
|
+
"%s. (Retry: Count: %d, Limit: %d, Wait: %d ms)",
|
84
|
+
exception.getMessage(),
|
85
|
+
retryCount,
|
86
|
+
retryLimit,
|
87
|
+
retryWait);
|
88
|
+
logger.warn(m, exception);
|
89
|
+
}
|
90
|
+
|
91
|
+
@Override
|
92
|
+
public void onGiveup(Exception firstException, Exception lastException)
|
93
|
+
throws RetryExecutor.RetryGiveupException
|
94
|
+
{
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
private <T> T run(Retryable<T> retryable)
|
99
|
+
{
|
100
|
+
try {
|
101
|
+
return re.run(retryable);
|
102
|
+
}
|
103
|
+
catch (RetryExecutor.RetryGiveupException e) {
|
104
|
+
throw new RuntimeException(e);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
private FileSystem getFs(Configuration conf, Optional<String> user)
|
109
|
+
{
|
110
|
+
if (user.isPresent()) {
|
111
|
+
return getFs(conf, user.get());
|
112
|
+
}
|
113
|
+
else {
|
114
|
+
return getFs(conf);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
private FileSystem getFs(final Configuration conf, final String user)
|
119
|
+
{
|
120
|
+
return run(new Retryable<FileSystem>()
|
121
|
+
{
|
122
|
+
@Override
|
123
|
+
public FileSystem call()
|
124
|
+
throws Exception
|
125
|
+
{
|
126
|
+
URI uri = FileSystem.getDefaultUri(conf);
|
127
|
+
return FileSystem.get(uri, conf, user);
|
128
|
+
}
|
129
|
+
});
|
130
|
+
}
|
131
|
+
|
132
|
+
private FileSystem getFs(final Configuration conf)
|
133
|
+
{
|
134
|
+
return run(new Retryable<FileSystem>()
|
135
|
+
{
|
136
|
+
@Override
|
137
|
+
public FileSystem call()
|
138
|
+
throws Exception
|
139
|
+
{
|
140
|
+
return FileSystem.get(conf);
|
141
|
+
}
|
142
|
+
});
|
143
|
+
}
|
144
|
+
|
145
|
+
public FileStatus[] glob(final Path globPath)
|
146
|
+
{
|
147
|
+
return run(new Retryable<FileStatus[]>()
|
148
|
+
{
|
149
|
+
@Override
|
150
|
+
public FileStatus[] call()
|
151
|
+
throws Exception
|
152
|
+
{
|
153
|
+
return fs.globStatus(globPath);
|
154
|
+
}
|
155
|
+
});
|
156
|
+
}
|
157
|
+
|
158
|
+
public boolean trash(final Path path)
|
159
|
+
{
|
160
|
+
return run(new Retryable<Boolean>()
|
161
|
+
{
|
162
|
+
@Override
|
163
|
+
public Boolean call()
|
164
|
+
throws Exception
|
165
|
+
{
|
166
|
+
return Trash.moveToAppropriateTrash(fs, path, conf);
|
167
|
+
}
|
168
|
+
});
|
169
|
+
}
|
170
|
+
|
171
|
+
public void globFilesAndTrash(final Path globPath)
|
172
|
+
{
|
173
|
+
for (final FileStatus fileStatus : glob(globPath)) {
|
174
|
+
if (fileStatus.isDirectory()) {
|
175
|
+
logger.debug("Skip {} because {} is a directory.",
|
176
|
+
fileStatus.getPath(), fileStatus.getPath());
|
177
|
+
continue;
|
178
|
+
}
|
179
|
+
logger.debug("Move To Trash: {}", fileStatus.getPath());
|
180
|
+
if (!trash(fileStatus.getPath())) {
|
181
|
+
throw new RuntimeException(String.format("Failed to Move To Trash: %s", fileStatus.getPath()));
|
182
|
+
}
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
public void globAndTrash(final Path globPath)
|
187
|
+
{
|
188
|
+
for (final FileStatus fileStatus : glob(globPath)) {
|
189
|
+
logger.debug("Move To Trash: {}", fileStatus.getPath());
|
190
|
+
if (!trash(fileStatus.getPath())) {
|
191
|
+
throw new RuntimeException(String.format("Failed to Move To Trash: %s", fileStatus.getPath()));
|
192
|
+
}
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
public OutputStream create(final Path path, final boolean overwrite)
|
197
|
+
{
|
198
|
+
return run(new Retryable<OutputStream>()
|
199
|
+
{
|
200
|
+
@Override
|
201
|
+
public OutputStream call()
|
202
|
+
throws Exception
|
203
|
+
{
|
204
|
+
return fs.create(path, overwrite);
|
205
|
+
}
|
206
|
+
});
|
207
|
+
}
|
208
|
+
|
209
|
+
public boolean mkdirs(String path)
|
210
|
+
{
|
211
|
+
return mkdirs(new Path(path));
|
212
|
+
}
|
213
|
+
|
214
|
+
public boolean mkdirs(final Path path)
|
215
|
+
{
|
216
|
+
return run(new Retryable<Boolean>()
|
217
|
+
{
|
218
|
+
@Override
|
219
|
+
public Boolean call()
|
220
|
+
throws Exception
|
221
|
+
{
|
222
|
+
return fs.mkdirs(path);
|
223
|
+
}
|
224
|
+
});
|
225
|
+
}
|
226
|
+
|
227
|
+
public void close()
|
228
|
+
{
|
229
|
+
run(new Retryable<Void>()
|
230
|
+
{
|
231
|
+
@Override
|
232
|
+
public Void call()
|
233
|
+
throws Exception
|
234
|
+
{
|
235
|
+
fs.close();
|
236
|
+
return null;
|
237
|
+
}
|
238
|
+
});
|
239
|
+
}
|
240
|
+
|
241
|
+
public void renameDirectory(String src, String dst, boolean trashIfExists)
|
242
|
+
{
|
243
|
+
renameDirectory(new Path(src), new Path(dst), trashIfExists);
|
244
|
+
}
|
245
|
+
|
246
|
+
public void renameDirectory(final Path src, final Path dst, final boolean trashIfExists)
|
247
|
+
{
|
248
|
+
run(new Retryable<Void>()
|
249
|
+
{
|
250
|
+
@Override
|
251
|
+
public Void call()
|
252
|
+
throws Exception
|
253
|
+
{
|
254
|
+
if (fs.exists(dst)) {
|
255
|
+
if (!trashIfExists) {
|
256
|
+
throw new DataException(String.format("Directory Exists: %s", dst.toString()));
|
257
|
+
}
|
258
|
+
logger.info("Move To Trash: {}", dst);
|
259
|
+
if (!trash(dst)) {
|
260
|
+
throw new IllegalStateException(String.format("Failed to Move To Trash: %s", dst.toString()));
|
261
|
+
}
|
262
|
+
}
|
263
|
+
FileContext.getFileContext(conf).rename(src, dst, Options.Rename.NONE);
|
264
|
+
logger.debug("Rename: {} >>> {}", src, dst);
|
265
|
+
return null;
|
266
|
+
}
|
267
|
+
});
|
268
|
+
}
|
269
|
+
}
|