embulk-output-hdfs 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -1
- data/CHANGELOG.md +9 -0
- data/README.md +38 -9
- data/build.gradle +10 -8
- data/example/config.yml +3 -1
- data/example/config_deprecated_option.yml +52 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -2
- data/gradlew +43 -35
- data/gradlew.bat +4 -10
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutput.java +160 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +55 -175
- data/src/main/java/org/embulk/output/hdfs/ModeTask.java +111 -0
- data/src/main/java/org/embulk/output/hdfs/client/HdfsClient.java +269 -0
- data/src/main/java/org/embulk/output/hdfs/compat/ModeCompat.java +76 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/AbortIfExistTx.java +6 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/AbstractTx.java +53 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/ControlRun.java +10 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/DeleteFilesInAdvanceTx.java +22 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/DeleteRecursiveInAdvanceTx.java +22 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/OverwriteTx.java +11 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/ReplaceTx.java +62 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/Tx.java +13 -0
- data/src/main/java/org/embulk/output/hdfs/util/SafeWorkspaceName.java +21 -0
- data/src/main/java/org/embulk/output/hdfs/util/SamplePath.java +21 -0
- data/src/main/java/org/embulk/output/hdfs/util/StrftimeUtil.java +23 -0
- data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +153 -22
- metadata +87 -70
@@ -0,0 +1,76 @@
|
|
1
|
+
package org.embulk.output.hdfs.compat;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.embulk.output.hdfs.ModeTask;
|
6
|
+
import org.embulk.output.hdfs.ModeTask.Mode;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.slf4j.Logger;
|
9
|
+
|
10
|
+
import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask.DeleteInAdvancePolicy;
|
11
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.ABORT_IF_EXIST;
|
12
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_FILES_IN_ADVANCE;
|
13
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_RECURSIVE_IN_ADVANCE;
|
14
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.OVERWRITE;
|
15
|
+
|
16
|
+
@Deprecated
|
17
|
+
public class ModeCompat
|
18
|
+
{
|
19
|
+
private static final Logger logger = Exec.getLogger(ModeCompat.class);
|
20
|
+
|
21
|
+
private ModeCompat()
|
22
|
+
{
|
23
|
+
}
|
24
|
+
|
25
|
+
@Deprecated
|
26
|
+
public static Mode getMode(
|
27
|
+
ModeTask task,
|
28
|
+
Optional<Boolean> overwrite,
|
29
|
+
Optional<DeleteInAdvancePolicy> deleteInAdvancePolicy)
|
30
|
+
{
|
31
|
+
if (!overwrite.isPresent() && !deleteInAdvancePolicy.isPresent()) {
|
32
|
+
return task.getMode();
|
33
|
+
}
|
34
|
+
|
35
|
+
// Display Deprecated Messages
|
36
|
+
if (overwrite.isPresent()) {
|
37
|
+
logger.warn("`overwrite` option is Deprecated. Please use `mode` option instead.");
|
38
|
+
}
|
39
|
+
if (deleteInAdvancePolicy.isPresent()) {
|
40
|
+
logger.warn("`delete_in_advance` is Deprecated. Please use `mode` option instead.");
|
41
|
+
}
|
42
|
+
if (!task.getMode().isDefaultMode()) {
|
43
|
+
String msg = "`mode` option cannot be used with `overwrite` option or `delete_in_advance` option.";
|
44
|
+
logger.error(msg);
|
45
|
+
throw new ConfigException(msg);
|
46
|
+
}
|
47
|
+
|
48
|
+
// Select Mode for Compatibility
|
49
|
+
if (!deleteInAdvancePolicy.isPresent()) {
|
50
|
+
if (overwrite.get()) {
|
51
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
|
52
|
+
return OVERWRITE;
|
53
|
+
}
|
54
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
|
55
|
+
return ABORT_IF_EXIST;
|
56
|
+
}
|
57
|
+
|
58
|
+
switch (deleteInAdvancePolicy.get()) { // deleteInAdvancePolicy is always present.
|
59
|
+
case NONE:
|
60
|
+
if (overwrite.isPresent() && overwrite.get()) {
|
61
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
|
62
|
+
return OVERWRITE;
|
63
|
+
}
|
64
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
|
65
|
+
return ABORT_IF_EXIST;
|
66
|
+
case FILE_ONLY:
|
67
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_FILES_IN_ADVANCE.toString()));
|
68
|
+
return DELETE_FILES_IN_ADVANCE;
|
69
|
+
case RECURSIVE:
|
70
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_RECURSIVE_IN_ADVANCE.toString()));
|
71
|
+
return DELETE_RECURSIVE_IN_ADVANCE;
|
72
|
+
default:
|
73
|
+
throw new ConfigException(String.format("Unknown policy: %s", deleteInAdvancePolicy.get()));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
@@ -0,0 +1,53 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.TaskReport;
|
5
|
+
import org.embulk.config.TaskSource;
|
6
|
+
import org.embulk.output.hdfs.HdfsFileOutput;
|
7
|
+
import org.embulk.output.hdfs.util.StrftimeUtil;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.TransactionalFileOutput;
|
10
|
+
|
11
|
+
import java.util.List;
|
12
|
+
|
13
|
+
import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
|
14
|
+
|
15
|
+
abstract class AbstractTx
|
16
|
+
implements Tx
|
17
|
+
{
|
18
|
+
protected void beforeRun(PluginTask task)
|
19
|
+
{
|
20
|
+
}
|
21
|
+
|
22
|
+
protected void afterRun(PluginTask task, List<TaskReport> reports)
|
23
|
+
{
|
24
|
+
}
|
25
|
+
|
26
|
+
protected ConfigDiff newConfigDiff()
|
27
|
+
{
|
28
|
+
return Exec.newConfigDiff();
|
29
|
+
}
|
30
|
+
|
31
|
+
public ConfigDiff transaction(PluginTask task, ControlRun control)
|
32
|
+
{
|
33
|
+
beforeRun(task);
|
34
|
+
List<TaskReport> reports = control.run();
|
35
|
+
afterRun(task, reports);
|
36
|
+
return newConfigDiff();
|
37
|
+
}
|
38
|
+
|
39
|
+
protected String getPathPrefix(PluginTask task)
|
40
|
+
{
|
41
|
+
return StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
|
42
|
+
}
|
43
|
+
|
44
|
+
protected boolean canOverwrite()
|
45
|
+
{
|
46
|
+
return false;
|
47
|
+
}
|
48
|
+
|
49
|
+
public TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex)
|
50
|
+
{
|
51
|
+
return new HdfsFileOutput(task, getPathPrefix(task), canOverwrite(), taskIndex);
|
52
|
+
}
|
53
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.apache.hadoop.fs.Path;
|
4
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin;
|
5
|
+
import org.embulk.output.hdfs.client.HdfsClient;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.slf4j.Logger;
|
8
|
+
|
9
|
+
public class DeleteFilesInAdvanceTx
|
10
|
+
extends AbstractTx
|
11
|
+
{
|
12
|
+
private static final Logger logger = Exec.getLogger(DeleteFilesInAdvanceTx.class);
|
13
|
+
|
14
|
+
@Override
|
15
|
+
protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
|
16
|
+
{
|
17
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
18
|
+
Path globPath = new Path(getPathPrefix(task) + "*");
|
19
|
+
logger.info("Delete {} (File Only) in advance", globPath);
|
20
|
+
hdfsClient.globFilesAndTrash(globPath);
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.apache.hadoop.fs.Path;
|
4
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin;
|
5
|
+
import org.embulk.output.hdfs.client.HdfsClient;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.slf4j.Logger;
|
8
|
+
|
9
|
+
public class DeleteRecursiveInAdvanceTx
|
10
|
+
extends AbstractTx
|
11
|
+
{
|
12
|
+
private static final Logger logger = Exec.getLogger(DeleteRecursiveInAdvanceTx.class);
|
13
|
+
|
14
|
+
@Override
|
15
|
+
protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
|
16
|
+
{
|
17
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
18
|
+
Path globPath = new Path(getPathPrefix(task) + "*");
|
19
|
+
logger.info("Delete {} (Recursive) in advance", globPath);
|
20
|
+
hdfsClient.globAndTrash(globPath);
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigException;
|
4
|
+
import org.embulk.config.TaskReport;
|
5
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
|
6
|
+
import org.embulk.output.hdfs.client.HdfsClient;
|
7
|
+
import org.embulk.output.hdfs.util.SafeWorkspaceName;
|
8
|
+
import org.embulk.output.hdfs.util.SamplePath;
|
9
|
+
import org.embulk.output.hdfs.util.StrftimeUtil;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.slf4j.Logger;
|
12
|
+
|
13
|
+
import java.nio.file.Paths;
|
14
|
+
import java.util.List;
|
15
|
+
|
16
|
+
public class ReplaceTx
|
17
|
+
extends AbstractTx
|
18
|
+
{
|
19
|
+
private static final Logger logger = Exec.getLogger(ReplaceTx.class);
|
20
|
+
|
21
|
+
@Override
|
22
|
+
protected String getPathPrefix(PluginTask task)
|
23
|
+
{
|
24
|
+
return Paths.get(task.getSafeWorkspace(), super.getPathPrefix(task)).toString();
|
25
|
+
}
|
26
|
+
|
27
|
+
@Override
|
28
|
+
protected void beforeRun(PluginTask task)
|
29
|
+
{
|
30
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
31
|
+
if (task.getSequenceFormat().contains("/")) {
|
32
|
+
throw new ConfigException("Must not include `/` in `sequence_format` if atomic is true.");
|
33
|
+
}
|
34
|
+
|
35
|
+
String safeWorkspace = SafeWorkspaceName.build(task.getWorkspace());
|
36
|
+
logger.info("Use as a workspace: {}", safeWorkspace);
|
37
|
+
|
38
|
+
String safeWsWithOutput = Paths.get(safeWorkspace, getOutputSampleDir(task)).toString();
|
39
|
+
logger.debug("The actual workspace must be with output dirs: {}", safeWsWithOutput);
|
40
|
+
if (!hdfsClient.mkdirs(safeWsWithOutput)) {
|
41
|
+
throw new ConfigException(String.format("Failed to make a directory: %s", safeWsWithOutput));
|
42
|
+
}
|
43
|
+
task.setSafeWorkspace(safeWorkspace);
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
protected void afterRun(PluginTask task, List<TaskReport> reports)
|
48
|
+
{
|
49
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
50
|
+
String outputDir = getOutputSampleDir(task);
|
51
|
+
String safeWsWithOutput = Paths.get(task.getSafeWorkspace(), getOutputSampleDir(task)).toString();
|
52
|
+
|
53
|
+
hdfsClient.renameDirectory(safeWsWithOutput, outputDir, true);
|
54
|
+
logger.info("Store: {} >>> {}", safeWsWithOutput, outputDir);
|
55
|
+
}
|
56
|
+
|
57
|
+
private String getOutputSampleDir(PluginTask task)
|
58
|
+
{
|
59
|
+
String pathPrefix = StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
|
60
|
+
return SamplePath.getDir(pathPrefix, task.getSequenceFormat(), task.getFileExt());
|
61
|
+
}
|
62
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.TaskSource;
|
5
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
|
6
|
+
import org.embulk.spi.TransactionalFileOutput;
|
7
|
+
|
8
|
+
public interface Tx
|
9
|
+
{
|
10
|
+
ConfigDiff transaction(PluginTask task, ControlRun control);
|
11
|
+
|
12
|
+
TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex);
|
13
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.output.hdfs.util;
|
2
|
+
|
3
|
+
import java.nio.file.Paths;
|
4
|
+
import java.util.UUID;
|
5
|
+
|
6
|
+
public class SafeWorkspaceName
|
7
|
+
{
|
8
|
+
private static final String prefix = "embulk-output-hdfs";
|
9
|
+
|
10
|
+
private SafeWorkspaceName()
|
11
|
+
{
|
12
|
+
}
|
13
|
+
|
14
|
+
public static String build(String workspace)
|
15
|
+
{
|
16
|
+
long nanoTime = System.nanoTime();
|
17
|
+
String uuid = UUID.randomUUID().toString();
|
18
|
+
String dirname = String.format("%s_%d_%s", prefix, nanoTime, uuid);
|
19
|
+
return Paths.get(workspace, dirname).toString();
|
20
|
+
}
|
21
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.output.hdfs.util;
|
2
|
+
|
3
|
+
import java.nio.file.Paths;
|
4
|
+
|
5
|
+
public class SamplePath
|
6
|
+
{
|
7
|
+
private SamplePath()
|
8
|
+
{
|
9
|
+
}
|
10
|
+
|
11
|
+
public static String getFile(String pathPrefix, String sequenceFormat, String fileExt)
|
12
|
+
{
|
13
|
+
return pathPrefix + String.format(sequenceFormat, 0, 0) + fileExt;
|
14
|
+
}
|
15
|
+
|
16
|
+
public static String getDir(String pathPrefix, String sequenceFormat, String fileExt)
|
17
|
+
{
|
18
|
+
String sampleFile = getFile(pathPrefix, sequenceFormat, fileExt);
|
19
|
+
return Paths.get(sampleFile).getParent().toString();
|
20
|
+
}
|
21
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.output.hdfs.util;
|
2
|
+
|
3
|
+
import org.jruby.embed.ScriptingContainer;
|
4
|
+
|
5
|
+
public class StrftimeUtil
|
6
|
+
{
|
7
|
+
private static final String scriptTemplate = "(Time.now - %d).strftime('%s')";
|
8
|
+
|
9
|
+
private StrftimeUtil()
|
10
|
+
{
|
11
|
+
}
|
12
|
+
|
13
|
+
public static String strftime(String format, int rewindSeconds)
|
14
|
+
{
|
15
|
+
String script = buildScript(format, rewindSeconds);
|
16
|
+
return new ScriptingContainer().runScriptlet(script).toString();
|
17
|
+
}
|
18
|
+
|
19
|
+
private static String buildScript(String format, int rewindSeconds)
|
20
|
+
{
|
21
|
+
return String.format(scriptTemplate, rewindSeconds, format);
|
22
|
+
}
|
23
|
+
}
|
@@ -4,6 +4,7 @@ import com.google.common.base.Charsets;
|
|
4
4
|
import com.google.common.base.Optional;
|
5
5
|
import com.google.common.collect.Lists;
|
6
6
|
import com.google.common.collect.Maps;
|
7
|
+
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
7
8
|
import org.embulk.EmbulkTestRuntime;
|
8
9
|
import org.embulk.config.ConfigException;
|
9
10
|
import org.embulk.config.ConfigSource;
|
@@ -17,6 +18,7 @@ import org.embulk.spi.PageTestUtils;
|
|
17
18
|
import org.embulk.spi.Schema;
|
18
19
|
import org.embulk.spi.TransactionalPageOutput;
|
19
20
|
import org.embulk.spi.time.Timestamp;
|
21
|
+
import org.junit.Assert;
|
20
22
|
import org.junit.Before;
|
21
23
|
import org.junit.Rule;
|
22
24
|
import org.junit.Test;
|
@@ -37,9 +39,11 @@ import static org.embulk.spi.type.Types.*;
|
|
37
39
|
import static org.hamcrest.CoreMatchers.containsString;
|
38
40
|
import static org.hamcrest.CoreMatchers.hasItem;
|
39
41
|
import static org.hamcrest.CoreMatchers.not;
|
42
|
+
import static org.hamcrest.core.Is.isA;
|
40
43
|
import static org.junit.Assert.assertEquals;
|
41
44
|
import static org.junit.Assert.assertNotEquals;
|
42
45
|
import static org.junit.Assert.assertThat;
|
46
|
+
import static org.junit.Assert.assertTrue;
|
43
47
|
import static org.msgpack.value.ValueFactory.newMap;
|
44
48
|
import static org.msgpack.value.ValueFactory.newString;
|
45
49
|
|
@@ -107,9 +111,9 @@ public class TestHdfsFileOutputPlugin
|
|
107
111
|
assertEquals(Lists.newArrayList(), task.getConfigFiles());
|
108
112
|
assertEquals(Maps.newHashMap(), task.getConfig());
|
109
113
|
assertEquals(0, task.getRewindSeconds());
|
110
|
-
assertEquals(
|
114
|
+
assertEquals(Optional.absent(), task.getOverwrite());
|
111
115
|
assertEquals(Optional.absent(), task.getDoas());
|
112
|
-
assertEquals(
|
116
|
+
assertEquals(Optional.absent(), task.getDeleteInAdvance());
|
113
117
|
}
|
114
118
|
|
115
119
|
@Test(expected = ConfigException.class)
|
@@ -119,6 +123,14 @@ public class TestHdfsFileOutputPlugin
|
|
119
123
|
PluginTask task = config.loadConfig(PluginTask.class);
|
120
124
|
}
|
121
125
|
|
126
|
+
@Test(expected = ConfigException.class)
|
127
|
+
public void testSequenceFormatMode_replace()
|
128
|
+
{
|
129
|
+
run(getBaseConfigSource()
|
130
|
+
.set("mode", "replace")
|
131
|
+
.set("sequence_format", "%d/%d"));
|
132
|
+
}
|
133
|
+
|
122
134
|
private List<String> lsR(List<String> names, java.nio.file.Path dir)
|
123
135
|
{
|
124
136
|
try (DirectoryStream<java.nio.file.Path> stream = Files.newDirectoryStream(dir)) {
|
@@ -201,14 +213,20 @@ public class TestHdfsFileOutputPlugin
|
|
201
213
|
}
|
202
214
|
}
|
203
215
|
|
216
|
+
private ConfigSource getDefaultFsConfig()
|
217
|
+
{
|
218
|
+
return Exec.newConfigSource()
|
219
|
+
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
220
|
+
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
221
|
+
.set("fs.trash.interval", "3600")
|
222
|
+
.set("fs.defaultFS", "file:///");
|
223
|
+
}
|
224
|
+
|
204
225
|
@Test
|
205
226
|
public void testBulkLoad()
|
206
227
|
{
|
207
228
|
ConfigSource config = getBaseConfigSource()
|
208
|
-
.setNested("config",
|
209
|
-
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
210
|
-
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
211
|
-
.set("fs.defaultFS", "file:///"));
|
229
|
+
.setNested("config", getDefaultFsConfig());
|
212
230
|
|
213
231
|
run(config);
|
214
232
|
List<String> fileList = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
@@ -219,28 +237,25 @@ public class TestHdfsFileOutputPlugin
|
|
219
237
|
}
|
220
238
|
|
221
239
|
@Test
|
222
|
-
public void
|
240
|
+
public void testDeleteInAdvance_RECURSIVE()
|
223
241
|
throws IOException
|
224
242
|
{
|
225
243
|
for (int n = 0; n <= 10; n++) {
|
226
|
-
tmpFolder.newFile("embulk-output-
|
227
|
-
tmpFolder.newFolder("embulk-output-
|
244
|
+
tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_file_" + n + ".txt");
|
245
|
+
tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_" + n);
|
228
246
|
}
|
229
247
|
|
230
248
|
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
231
249
|
|
232
250
|
ConfigSource config = getBaseConfigSource()
|
233
|
-
.setNested("config",
|
234
|
-
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
235
|
-
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
236
|
-
.set("fs.defaultFS", "file:///"))
|
251
|
+
.setNested("config", getDefaultFsConfig())
|
237
252
|
.set("delete_in_advance", "RECURSIVE");
|
238
253
|
|
239
254
|
run(config);
|
240
255
|
|
241
256
|
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
242
257
|
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
243
|
-
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-
|
258
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_"))));
|
244
259
|
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
245
260
|
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
246
261
|
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
@@ -249,21 +264,18 @@ public class TestHdfsFileOutputPlugin
|
|
249
264
|
}
|
250
265
|
|
251
266
|
@Test
|
252
|
-
public void
|
267
|
+
public void testDeleteInAdvance_FILE_ONLY()
|
253
268
|
throws IOException
|
254
269
|
{
|
255
270
|
for (int n = 0; n <= 10; n++) {
|
256
|
-
tmpFolder.newFile("embulk-output-
|
257
|
-
tmpFolder.newFolder("embulk-output-
|
271
|
+
tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_file_" + n + ".txt");
|
272
|
+
tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_" + n);
|
258
273
|
}
|
259
274
|
|
260
275
|
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
261
276
|
|
262
277
|
ConfigSource config = getBaseConfigSource()
|
263
|
-
.setNested("config",
|
264
|
-
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
265
|
-
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
266
|
-
.set("fs.defaultFS", "file:///"))
|
278
|
+
.setNested("config", getDefaultFsConfig())
|
267
279
|
.set("delete_in_advance", "FILE_ONLY");
|
268
280
|
|
269
281
|
run(config);
|
@@ -271,7 +283,126 @@ public class TestHdfsFileOutputPlugin
|
|
271
283
|
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
272
284
|
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
273
285
|
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
274
|
-
assertThat(fileListAfterRun, hasItem(containsString("embulk-output-
|
286
|
+
assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_")));
|
287
|
+
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
288
|
+
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
289
|
+
tmpFolder.getRoot().getAbsolutePath(),
|
290
|
+
pathPrefix));
|
291
|
+
}
|
292
|
+
|
293
|
+
@Test
|
294
|
+
public void testMode_delete_recursive_in_advance()
|
295
|
+
throws IOException
|
296
|
+
{
|
297
|
+
for (int n = 0; n <= 10; n++) {
|
298
|
+
tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
|
299
|
+
tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
|
300
|
+
}
|
301
|
+
|
302
|
+
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
303
|
+
|
304
|
+
ConfigSource config = getBaseConfigSource()
|
305
|
+
.setNested("config", getDefaultFsConfig())
|
306
|
+
.set("mode", "delete_recursive_in_advance");
|
307
|
+
|
308
|
+
run(config);
|
309
|
+
|
310
|
+
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
311
|
+
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
312
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
|
313
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
314
|
+
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
315
|
+
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
316
|
+
tmpFolder.getRoot().getAbsolutePath(),
|
317
|
+
pathPrefix));
|
318
|
+
}
|
319
|
+
|
320
|
+
@Test
|
321
|
+
public void testMode_delete_files_in_advance()
|
322
|
+
throws IOException
|
323
|
+
{
|
324
|
+
for (int n = 0; n <= 10; n++) {
|
325
|
+
tmpFolder.newFile("embulk-output-hdfs_testMode_delete_files_in_advance_file_" + n + ".txt");
|
326
|
+
tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_files_in_advance_directory_" + n);
|
327
|
+
}
|
328
|
+
|
329
|
+
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
330
|
+
|
331
|
+
ConfigSource config = getBaseConfigSource()
|
332
|
+
.setNested("config", getDefaultFsConfig())
|
333
|
+
.set("mode", "delete_files_in_advance");
|
334
|
+
|
335
|
+
run(config);
|
336
|
+
|
337
|
+
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
338
|
+
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
339
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
340
|
+
assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testMode_delete_files_in_advance_directory_")));
|
341
|
+
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
342
|
+
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
343
|
+
tmpFolder.getRoot().getAbsolutePath(),
|
344
|
+
pathPrefix));
|
345
|
+
}
|
346
|
+
|
347
|
+
@Test
|
348
|
+
public void testMode_abort_if_exist()
|
349
|
+
throws IOException
|
350
|
+
{
|
351
|
+
ConfigSource config = getBaseConfigSource()
|
352
|
+
.setNested("config", getDefaultFsConfig())
|
353
|
+
.set("mode", "abort_if_exist");
|
354
|
+
|
355
|
+
run(config);
|
356
|
+
try {
|
357
|
+
run(config);
|
358
|
+
}
|
359
|
+
catch (Exception e) {
|
360
|
+
Throwable t = e;
|
361
|
+
while (t != null) {
|
362
|
+
t = t.getCause();
|
363
|
+
if (t.getCause() instanceof FileAlreadyExistsException) {
|
364
|
+
Assert.assertTrue(true);
|
365
|
+
return;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
Assert.fail("FileAlreadyExistsException is not cause.");
|
369
|
+
}
|
370
|
+
|
371
|
+
}
|
372
|
+
|
373
|
+
@Test
|
374
|
+
public void testMode_overwrite()
|
375
|
+
throws IOException
|
376
|
+
{
|
377
|
+
ConfigSource config = getBaseConfigSource()
|
378
|
+
.setNested("config", getDefaultFsConfig())
|
379
|
+
.set("mode", "overwrite");
|
380
|
+
|
381
|
+
run(config);
|
382
|
+
run(config);
|
383
|
+
Assert.assertTrue(true);
|
384
|
+
}
|
385
|
+
|
386
|
+
@Test
|
387
|
+
public void testMode_replace()
|
388
|
+
throws IOException
|
389
|
+
{
|
390
|
+
for (int n = 0; n <= 10; n++) {
|
391
|
+
tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
|
392
|
+
tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
|
393
|
+
}
|
394
|
+
|
395
|
+
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
396
|
+
|
397
|
+
|
398
|
+
run(getBaseConfigSource()
|
399
|
+
.set("config", getDefaultFsConfig())
|
400
|
+
.set("mode", "replace"));
|
401
|
+
|
402
|
+
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
403
|
+
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
404
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
|
405
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
275
406
|
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
276
407
|
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
277
408
|
tmpFolder.getRoot().getAbsolutePath(),
|