embulk-output-hdfs 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -1
- data/CHANGELOG.md +9 -0
- data/README.md +38 -9
- data/build.gradle +10 -8
- data/example/config.yml +3 -1
- data/example/config_deprecated_option.yml +52 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -2
- data/gradlew +43 -35
- data/gradlew.bat +4 -10
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutput.java +160 -0
- data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +55 -175
- data/src/main/java/org/embulk/output/hdfs/ModeTask.java +111 -0
- data/src/main/java/org/embulk/output/hdfs/client/HdfsClient.java +269 -0
- data/src/main/java/org/embulk/output/hdfs/compat/ModeCompat.java +76 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/AbortIfExistTx.java +6 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/AbstractTx.java +53 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/ControlRun.java +10 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/DeleteFilesInAdvanceTx.java +22 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/DeleteRecursiveInAdvanceTx.java +22 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/OverwriteTx.java +11 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/ReplaceTx.java +62 -0
- data/src/main/java/org/embulk/output/hdfs/transaction/Tx.java +13 -0
- data/src/main/java/org/embulk/output/hdfs/util/SafeWorkspaceName.java +21 -0
- data/src/main/java/org/embulk/output/hdfs/util/SamplePath.java +21 -0
- data/src/main/java/org/embulk/output/hdfs/util/StrftimeUtil.java +23 -0
- data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +153 -22
- metadata +87 -70
@@ -0,0 +1,76 @@
|
|
1
|
+
package org.embulk.output.hdfs.compat;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.ConfigException;
|
5
|
+
import org.embulk.output.hdfs.ModeTask;
|
6
|
+
import org.embulk.output.hdfs.ModeTask.Mode;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.slf4j.Logger;
|
9
|
+
|
10
|
+
import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask.DeleteInAdvancePolicy;
|
11
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.ABORT_IF_EXIST;
|
12
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_FILES_IN_ADVANCE;
|
13
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_RECURSIVE_IN_ADVANCE;
|
14
|
+
import static org.embulk.output.hdfs.ModeTask.Mode.OVERWRITE;
|
15
|
+
|
16
|
+
@Deprecated
|
17
|
+
public class ModeCompat
|
18
|
+
{
|
19
|
+
private static final Logger logger = Exec.getLogger(ModeCompat.class);
|
20
|
+
|
21
|
+
private ModeCompat()
|
22
|
+
{
|
23
|
+
}
|
24
|
+
|
25
|
+
@Deprecated
|
26
|
+
public static Mode getMode(
|
27
|
+
ModeTask task,
|
28
|
+
Optional<Boolean> overwrite,
|
29
|
+
Optional<DeleteInAdvancePolicy> deleteInAdvancePolicy)
|
30
|
+
{
|
31
|
+
if (!overwrite.isPresent() && !deleteInAdvancePolicy.isPresent()) {
|
32
|
+
return task.getMode();
|
33
|
+
}
|
34
|
+
|
35
|
+
// Display Deprecated Messages
|
36
|
+
if (overwrite.isPresent()) {
|
37
|
+
logger.warn("`overwrite` option is Deprecated. Please use `mode` option instead.");
|
38
|
+
}
|
39
|
+
if (deleteInAdvancePolicy.isPresent()) {
|
40
|
+
logger.warn("`delete_in_advance` is Deprecated. Please use `mode` option instead.");
|
41
|
+
}
|
42
|
+
if (!task.getMode().isDefaultMode()) {
|
43
|
+
String msg = "`mode` option cannot be used with `overwrite` option or `delete_in_advance` option.";
|
44
|
+
logger.error(msg);
|
45
|
+
throw new ConfigException(msg);
|
46
|
+
}
|
47
|
+
|
48
|
+
// Select Mode for Compatibility
|
49
|
+
if (!deleteInAdvancePolicy.isPresent()) {
|
50
|
+
if (overwrite.get()) {
|
51
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
|
52
|
+
return OVERWRITE;
|
53
|
+
}
|
54
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
|
55
|
+
return ABORT_IF_EXIST;
|
56
|
+
}
|
57
|
+
|
58
|
+
switch (deleteInAdvancePolicy.get()) { // deleteInAdvancePolicy is always present.
|
59
|
+
case NONE:
|
60
|
+
if (overwrite.isPresent() && overwrite.get()) {
|
61
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
|
62
|
+
return OVERWRITE;
|
63
|
+
}
|
64
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
|
65
|
+
return ABORT_IF_EXIST;
|
66
|
+
case FILE_ONLY:
|
67
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_FILES_IN_ADVANCE.toString()));
|
68
|
+
return DELETE_FILES_IN_ADVANCE;
|
69
|
+
case RECURSIVE:
|
70
|
+
logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_RECURSIVE_IN_ADVANCE.toString()));
|
71
|
+
return DELETE_RECURSIVE_IN_ADVANCE;
|
72
|
+
default:
|
73
|
+
throw new ConfigException(String.format("Unknown policy: %s", deleteInAdvancePolicy.get()));
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
@@ -0,0 +1,53 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.TaskReport;
|
5
|
+
import org.embulk.config.TaskSource;
|
6
|
+
import org.embulk.output.hdfs.HdfsFileOutput;
|
7
|
+
import org.embulk.output.hdfs.util.StrftimeUtil;
|
8
|
+
import org.embulk.spi.Exec;
|
9
|
+
import org.embulk.spi.TransactionalFileOutput;
|
10
|
+
|
11
|
+
import java.util.List;
|
12
|
+
|
13
|
+
import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
|
14
|
+
|
15
|
+
abstract class AbstractTx
|
16
|
+
implements Tx
|
17
|
+
{
|
18
|
+
protected void beforeRun(PluginTask task)
|
19
|
+
{
|
20
|
+
}
|
21
|
+
|
22
|
+
protected void afterRun(PluginTask task, List<TaskReport> reports)
|
23
|
+
{
|
24
|
+
}
|
25
|
+
|
26
|
+
protected ConfigDiff newConfigDiff()
|
27
|
+
{
|
28
|
+
return Exec.newConfigDiff();
|
29
|
+
}
|
30
|
+
|
31
|
+
public ConfigDiff transaction(PluginTask task, ControlRun control)
|
32
|
+
{
|
33
|
+
beforeRun(task);
|
34
|
+
List<TaskReport> reports = control.run();
|
35
|
+
afterRun(task, reports);
|
36
|
+
return newConfigDiff();
|
37
|
+
}
|
38
|
+
|
39
|
+
protected String getPathPrefix(PluginTask task)
|
40
|
+
{
|
41
|
+
return StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
|
42
|
+
}
|
43
|
+
|
44
|
+
protected boolean canOverwrite()
|
45
|
+
{
|
46
|
+
return false;
|
47
|
+
}
|
48
|
+
|
49
|
+
public TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex)
|
50
|
+
{
|
51
|
+
return new HdfsFileOutput(task, getPathPrefix(task), canOverwrite(), taskIndex);
|
52
|
+
}
|
53
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.apache.hadoop.fs.Path;
|
4
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin;
|
5
|
+
import org.embulk.output.hdfs.client.HdfsClient;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.slf4j.Logger;
|
8
|
+
|
9
|
+
public class DeleteFilesInAdvanceTx
|
10
|
+
extends AbstractTx
|
11
|
+
{
|
12
|
+
private static final Logger logger = Exec.getLogger(DeleteFilesInAdvanceTx.class);
|
13
|
+
|
14
|
+
@Override
|
15
|
+
protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
|
16
|
+
{
|
17
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
18
|
+
Path globPath = new Path(getPathPrefix(task) + "*");
|
19
|
+
logger.info("Delete {} (File Only) in advance", globPath);
|
20
|
+
hdfsClient.globFilesAndTrash(globPath);
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.apache.hadoop.fs.Path;
|
4
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin;
|
5
|
+
import org.embulk.output.hdfs.client.HdfsClient;
|
6
|
+
import org.embulk.spi.Exec;
|
7
|
+
import org.slf4j.Logger;
|
8
|
+
|
9
|
+
public class DeleteRecursiveInAdvanceTx
|
10
|
+
extends AbstractTx
|
11
|
+
{
|
12
|
+
private static final Logger logger = Exec.getLogger(DeleteRecursiveInAdvanceTx.class);
|
13
|
+
|
14
|
+
@Override
|
15
|
+
protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
|
16
|
+
{
|
17
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
18
|
+
Path globPath = new Path(getPathPrefix(task) + "*");
|
19
|
+
logger.info("Delete {} (Recursive) in advance", globPath);
|
20
|
+
hdfsClient.globAndTrash(globPath);
|
21
|
+
}
|
22
|
+
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigException;
|
4
|
+
import org.embulk.config.TaskReport;
|
5
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
|
6
|
+
import org.embulk.output.hdfs.client.HdfsClient;
|
7
|
+
import org.embulk.output.hdfs.util.SafeWorkspaceName;
|
8
|
+
import org.embulk.output.hdfs.util.SamplePath;
|
9
|
+
import org.embulk.output.hdfs.util.StrftimeUtil;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.slf4j.Logger;
|
12
|
+
|
13
|
+
import java.nio.file.Paths;
|
14
|
+
import java.util.List;
|
15
|
+
|
16
|
+
public class ReplaceTx
|
17
|
+
extends AbstractTx
|
18
|
+
{
|
19
|
+
private static final Logger logger = Exec.getLogger(ReplaceTx.class);
|
20
|
+
|
21
|
+
@Override
|
22
|
+
protected String getPathPrefix(PluginTask task)
|
23
|
+
{
|
24
|
+
return Paths.get(task.getSafeWorkspace(), super.getPathPrefix(task)).toString();
|
25
|
+
}
|
26
|
+
|
27
|
+
@Override
|
28
|
+
protected void beforeRun(PluginTask task)
|
29
|
+
{
|
30
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
31
|
+
if (task.getSequenceFormat().contains("/")) {
|
32
|
+
throw new ConfigException("Must not include `/` in `sequence_format` if atomic is true.");
|
33
|
+
}
|
34
|
+
|
35
|
+
String safeWorkspace = SafeWorkspaceName.build(task.getWorkspace());
|
36
|
+
logger.info("Use as a workspace: {}", safeWorkspace);
|
37
|
+
|
38
|
+
String safeWsWithOutput = Paths.get(safeWorkspace, getOutputSampleDir(task)).toString();
|
39
|
+
logger.debug("The actual workspace must be with output dirs: {}", safeWsWithOutput);
|
40
|
+
if (!hdfsClient.mkdirs(safeWsWithOutput)) {
|
41
|
+
throw new ConfigException(String.format("Failed to make a directory: %s", safeWsWithOutput));
|
42
|
+
}
|
43
|
+
task.setSafeWorkspace(safeWorkspace);
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
protected void afterRun(PluginTask task, List<TaskReport> reports)
|
48
|
+
{
|
49
|
+
HdfsClient hdfsClient = HdfsClient.build(task);
|
50
|
+
String outputDir = getOutputSampleDir(task);
|
51
|
+
String safeWsWithOutput = Paths.get(task.getSafeWorkspace(), getOutputSampleDir(task)).toString();
|
52
|
+
|
53
|
+
hdfsClient.renameDirectory(safeWsWithOutput, outputDir, true);
|
54
|
+
logger.info("Store: {} >>> {}", safeWsWithOutput, outputDir);
|
55
|
+
}
|
56
|
+
|
57
|
+
private String getOutputSampleDir(PluginTask task)
|
58
|
+
{
|
59
|
+
String pathPrefix = StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
|
60
|
+
return SamplePath.getDir(pathPrefix, task.getSequenceFormat(), task.getFileExt());
|
61
|
+
}
|
62
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
package org.embulk.output.hdfs.transaction;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigDiff;
|
4
|
+
import org.embulk.config.TaskSource;
|
5
|
+
import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
|
6
|
+
import org.embulk.spi.TransactionalFileOutput;
|
7
|
+
|
8
|
+
public interface Tx
|
9
|
+
{
|
10
|
+
ConfigDiff transaction(PluginTask task, ControlRun control);
|
11
|
+
|
12
|
+
TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex);
|
13
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.output.hdfs.util;
|
2
|
+
|
3
|
+
import java.nio.file.Paths;
|
4
|
+
import java.util.UUID;
|
5
|
+
|
6
|
+
public class SafeWorkspaceName
|
7
|
+
{
|
8
|
+
private static final String prefix = "embulk-output-hdfs";
|
9
|
+
|
10
|
+
private SafeWorkspaceName()
|
11
|
+
{
|
12
|
+
}
|
13
|
+
|
14
|
+
public static String build(String workspace)
|
15
|
+
{
|
16
|
+
long nanoTime = System.nanoTime();
|
17
|
+
String uuid = UUID.randomUUID().toString();
|
18
|
+
String dirname = String.format("%s_%d_%s", prefix, nanoTime, uuid);
|
19
|
+
return Paths.get(workspace, dirname).toString();
|
20
|
+
}
|
21
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
package org.embulk.output.hdfs.util;
|
2
|
+
|
3
|
+
import java.nio.file.Paths;
|
4
|
+
|
5
|
+
public class SamplePath
|
6
|
+
{
|
7
|
+
private SamplePath()
|
8
|
+
{
|
9
|
+
}
|
10
|
+
|
11
|
+
public static String getFile(String pathPrefix, String sequenceFormat, String fileExt)
|
12
|
+
{
|
13
|
+
return pathPrefix + String.format(sequenceFormat, 0, 0) + fileExt;
|
14
|
+
}
|
15
|
+
|
16
|
+
public static String getDir(String pathPrefix, String sequenceFormat, String fileExt)
|
17
|
+
{
|
18
|
+
String sampleFile = getFile(pathPrefix, sequenceFormat, fileExt);
|
19
|
+
return Paths.get(sampleFile).getParent().toString();
|
20
|
+
}
|
21
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.output.hdfs.util;
|
2
|
+
|
3
|
+
import org.jruby.embed.ScriptingContainer;
|
4
|
+
|
5
|
+
public class StrftimeUtil
|
6
|
+
{
|
7
|
+
private static final String scriptTemplate = "(Time.now - %d).strftime('%s')";
|
8
|
+
|
9
|
+
private StrftimeUtil()
|
10
|
+
{
|
11
|
+
}
|
12
|
+
|
13
|
+
public static String strftime(String format, int rewindSeconds)
|
14
|
+
{
|
15
|
+
String script = buildScript(format, rewindSeconds);
|
16
|
+
return new ScriptingContainer().runScriptlet(script).toString();
|
17
|
+
}
|
18
|
+
|
19
|
+
private static String buildScript(String format, int rewindSeconds)
|
20
|
+
{
|
21
|
+
return String.format(scriptTemplate, rewindSeconds, format);
|
22
|
+
}
|
23
|
+
}
|
@@ -4,6 +4,7 @@ import com.google.common.base.Charsets;
|
|
4
4
|
import com.google.common.base.Optional;
|
5
5
|
import com.google.common.collect.Lists;
|
6
6
|
import com.google.common.collect.Maps;
|
7
|
+
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
7
8
|
import org.embulk.EmbulkTestRuntime;
|
8
9
|
import org.embulk.config.ConfigException;
|
9
10
|
import org.embulk.config.ConfigSource;
|
@@ -17,6 +18,7 @@ import org.embulk.spi.PageTestUtils;
|
|
17
18
|
import org.embulk.spi.Schema;
|
18
19
|
import org.embulk.spi.TransactionalPageOutput;
|
19
20
|
import org.embulk.spi.time.Timestamp;
|
21
|
+
import org.junit.Assert;
|
20
22
|
import org.junit.Before;
|
21
23
|
import org.junit.Rule;
|
22
24
|
import org.junit.Test;
|
@@ -37,9 +39,11 @@ import static org.embulk.spi.type.Types.*;
|
|
37
39
|
import static org.hamcrest.CoreMatchers.containsString;
|
38
40
|
import static org.hamcrest.CoreMatchers.hasItem;
|
39
41
|
import static org.hamcrest.CoreMatchers.not;
|
42
|
+
import static org.hamcrest.core.Is.isA;
|
40
43
|
import static org.junit.Assert.assertEquals;
|
41
44
|
import static org.junit.Assert.assertNotEquals;
|
42
45
|
import static org.junit.Assert.assertThat;
|
46
|
+
import static org.junit.Assert.assertTrue;
|
43
47
|
import static org.msgpack.value.ValueFactory.newMap;
|
44
48
|
import static org.msgpack.value.ValueFactory.newString;
|
45
49
|
|
@@ -107,9 +111,9 @@ public class TestHdfsFileOutputPlugin
|
|
107
111
|
assertEquals(Lists.newArrayList(), task.getConfigFiles());
|
108
112
|
assertEquals(Maps.newHashMap(), task.getConfig());
|
109
113
|
assertEquals(0, task.getRewindSeconds());
|
110
|
-
assertEquals(
|
114
|
+
assertEquals(Optional.absent(), task.getOverwrite());
|
111
115
|
assertEquals(Optional.absent(), task.getDoas());
|
112
|
-
assertEquals(
|
116
|
+
assertEquals(Optional.absent(), task.getDeleteInAdvance());
|
113
117
|
}
|
114
118
|
|
115
119
|
@Test(expected = ConfigException.class)
|
@@ -119,6 +123,14 @@ public class TestHdfsFileOutputPlugin
|
|
119
123
|
PluginTask task = config.loadConfig(PluginTask.class);
|
120
124
|
}
|
121
125
|
|
126
|
+
@Test(expected = ConfigException.class)
|
127
|
+
public void testSequenceFormatMode_replace()
|
128
|
+
{
|
129
|
+
run(getBaseConfigSource()
|
130
|
+
.set("mode", "replace")
|
131
|
+
.set("sequence_format", "%d/%d"));
|
132
|
+
}
|
133
|
+
|
122
134
|
private List<String> lsR(List<String> names, java.nio.file.Path dir)
|
123
135
|
{
|
124
136
|
try (DirectoryStream<java.nio.file.Path> stream = Files.newDirectoryStream(dir)) {
|
@@ -201,14 +213,20 @@ public class TestHdfsFileOutputPlugin
|
|
201
213
|
}
|
202
214
|
}
|
203
215
|
|
216
|
+
private ConfigSource getDefaultFsConfig()
|
217
|
+
{
|
218
|
+
return Exec.newConfigSource()
|
219
|
+
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
220
|
+
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
221
|
+
.set("fs.trash.interval", "3600")
|
222
|
+
.set("fs.defaultFS", "file:///");
|
223
|
+
}
|
224
|
+
|
204
225
|
@Test
|
205
226
|
public void testBulkLoad()
|
206
227
|
{
|
207
228
|
ConfigSource config = getBaseConfigSource()
|
208
|
-
.setNested("config",
|
209
|
-
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
210
|
-
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
211
|
-
.set("fs.defaultFS", "file:///"));
|
229
|
+
.setNested("config", getDefaultFsConfig());
|
212
230
|
|
213
231
|
run(config);
|
214
232
|
List<String> fileList = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
@@ -219,28 +237,25 @@ public class TestHdfsFileOutputPlugin
|
|
219
237
|
}
|
220
238
|
|
221
239
|
@Test
|
222
|
-
public void
|
240
|
+
public void testDeleteInAdvance_RECURSIVE()
|
223
241
|
throws IOException
|
224
242
|
{
|
225
243
|
for (int n = 0; n <= 10; n++) {
|
226
|
-
tmpFolder.newFile("embulk-output-
|
227
|
-
tmpFolder.newFolder("embulk-output-
|
244
|
+
tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_file_" + n + ".txt");
|
245
|
+
tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_" + n);
|
228
246
|
}
|
229
247
|
|
230
248
|
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
231
249
|
|
232
250
|
ConfigSource config = getBaseConfigSource()
|
233
|
-
.setNested("config",
|
234
|
-
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
235
|
-
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
236
|
-
.set("fs.defaultFS", "file:///"))
|
251
|
+
.setNested("config", getDefaultFsConfig())
|
237
252
|
.set("delete_in_advance", "RECURSIVE");
|
238
253
|
|
239
254
|
run(config);
|
240
255
|
|
241
256
|
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
242
257
|
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
243
|
-
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-
|
258
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_"))));
|
244
259
|
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
245
260
|
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
246
261
|
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
@@ -249,21 +264,18 @@ public class TestHdfsFileOutputPlugin
|
|
249
264
|
}
|
250
265
|
|
251
266
|
@Test
|
252
|
-
public void
|
267
|
+
public void testDeleteInAdvance_FILE_ONLY()
|
253
268
|
throws IOException
|
254
269
|
{
|
255
270
|
for (int n = 0; n <= 10; n++) {
|
256
|
-
tmpFolder.newFile("embulk-output-
|
257
|
-
tmpFolder.newFolder("embulk-output-
|
271
|
+
tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_file_" + n + ".txt");
|
272
|
+
tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_" + n);
|
258
273
|
}
|
259
274
|
|
260
275
|
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
261
276
|
|
262
277
|
ConfigSource config = getBaseConfigSource()
|
263
|
-
.setNested("config",
|
264
|
-
.set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
265
|
-
.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
|
266
|
-
.set("fs.defaultFS", "file:///"))
|
278
|
+
.setNested("config", getDefaultFsConfig())
|
267
279
|
.set("delete_in_advance", "FILE_ONLY");
|
268
280
|
|
269
281
|
run(config);
|
@@ -271,7 +283,126 @@ public class TestHdfsFileOutputPlugin
|
|
271
283
|
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
272
284
|
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
273
285
|
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
274
|
-
assertThat(fileListAfterRun, hasItem(containsString("embulk-output-
|
286
|
+
assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_")));
|
287
|
+
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
288
|
+
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
289
|
+
tmpFolder.getRoot().getAbsolutePath(),
|
290
|
+
pathPrefix));
|
291
|
+
}
|
292
|
+
|
293
|
+
@Test
|
294
|
+
public void testMode_delete_recursive_in_advance()
|
295
|
+
throws IOException
|
296
|
+
{
|
297
|
+
for (int n = 0; n <= 10; n++) {
|
298
|
+
tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
|
299
|
+
tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
|
300
|
+
}
|
301
|
+
|
302
|
+
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
303
|
+
|
304
|
+
ConfigSource config = getBaseConfigSource()
|
305
|
+
.setNested("config", getDefaultFsConfig())
|
306
|
+
.set("mode", "delete_recursive_in_advance");
|
307
|
+
|
308
|
+
run(config);
|
309
|
+
|
310
|
+
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
311
|
+
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
312
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
|
313
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
314
|
+
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
315
|
+
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
316
|
+
tmpFolder.getRoot().getAbsolutePath(),
|
317
|
+
pathPrefix));
|
318
|
+
}
|
319
|
+
|
320
|
+
@Test
|
321
|
+
public void testMode_delete_files_in_advance()
|
322
|
+
throws IOException
|
323
|
+
{
|
324
|
+
for (int n = 0; n <= 10; n++) {
|
325
|
+
tmpFolder.newFile("embulk-output-hdfs_testMode_delete_files_in_advance_file_" + n + ".txt");
|
326
|
+
tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_files_in_advance_directory_" + n);
|
327
|
+
}
|
328
|
+
|
329
|
+
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
330
|
+
|
331
|
+
ConfigSource config = getBaseConfigSource()
|
332
|
+
.setNested("config", getDefaultFsConfig())
|
333
|
+
.set("mode", "delete_files_in_advance");
|
334
|
+
|
335
|
+
run(config);
|
336
|
+
|
337
|
+
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
338
|
+
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
339
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
340
|
+
assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testMode_delete_files_in_advance_directory_")));
|
341
|
+
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
342
|
+
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
343
|
+
tmpFolder.getRoot().getAbsolutePath(),
|
344
|
+
pathPrefix));
|
345
|
+
}
|
346
|
+
|
347
|
+
@Test
|
348
|
+
public void testMode_abort_if_exist()
|
349
|
+
throws IOException
|
350
|
+
{
|
351
|
+
ConfigSource config = getBaseConfigSource()
|
352
|
+
.setNested("config", getDefaultFsConfig())
|
353
|
+
.set("mode", "abort_if_exist");
|
354
|
+
|
355
|
+
run(config);
|
356
|
+
try {
|
357
|
+
run(config);
|
358
|
+
}
|
359
|
+
catch (Exception e) {
|
360
|
+
Throwable t = e;
|
361
|
+
while (t != null) {
|
362
|
+
t = t.getCause();
|
363
|
+
if (t.getCause() instanceof FileAlreadyExistsException) {
|
364
|
+
Assert.assertTrue(true);
|
365
|
+
return;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
Assert.fail("FileAlreadyExistsException is not cause.");
|
369
|
+
}
|
370
|
+
|
371
|
+
}
|
372
|
+
|
373
|
+
@Test
|
374
|
+
public void testMode_overwrite()
|
375
|
+
throws IOException
|
376
|
+
{
|
377
|
+
ConfigSource config = getBaseConfigSource()
|
378
|
+
.setNested("config", getDefaultFsConfig())
|
379
|
+
.set("mode", "overwrite");
|
380
|
+
|
381
|
+
run(config);
|
382
|
+
run(config);
|
383
|
+
Assert.assertTrue(true);
|
384
|
+
}
|
385
|
+
|
386
|
+
@Test
|
387
|
+
public void testMode_replace()
|
388
|
+
throws IOException
|
389
|
+
{
|
390
|
+
for (int n = 0; n <= 10; n++) {
|
391
|
+
tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
|
392
|
+
tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
|
393
|
+
}
|
394
|
+
|
395
|
+
List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
396
|
+
|
397
|
+
|
398
|
+
run(getBaseConfigSource()
|
399
|
+
.set("config", getDefaultFsConfig())
|
400
|
+
.set("mode", "replace"));
|
401
|
+
|
402
|
+
List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
|
403
|
+
assertNotEquals(fileListBeforeRun, fileListAfterRun);
|
404
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
|
405
|
+
assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
|
275
406
|
assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
|
276
407
|
assertRecordsInFile(String.format("%s/%s001.00.csv",
|
277
408
|
tmpFolder.getRoot().getAbsolutePath(),
|