embulk-output-hdfs 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -1
  3. data/CHANGELOG.md +9 -0
  4. data/README.md +38 -9
  5. data/build.gradle +10 -8
  6. data/example/config.yml +3 -1
  7. data/example/config_deprecated_option.yml +52 -0
  8. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  9. data/gradle/wrapper/gradle-wrapper.properties +1 -2
  10. data/gradlew +43 -35
  11. data/gradlew.bat +4 -10
  12. data/settings.gradle +1 -0
  13. data/src/main/java/org/embulk/output/hdfs/HdfsFileOutput.java +160 -0
  14. data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +55 -175
  15. data/src/main/java/org/embulk/output/hdfs/ModeTask.java +111 -0
  16. data/src/main/java/org/embulk/output/hdfs/client/HdfsClient.java +269 -0
  17. data/src/main/java/org/embulk/output/hdfs/compat/ModeCompat.java +76 -0
  18. data/src/main/java/org/embulk/output/hdfs/transaction/AbortIfExistTx.java +6 -0
  19. data/src/main/java/org/embulk/output/hdfs/transaction/AbstractTx.java +53 -0
  20. data/src/main/java/org/embulk/output/hdfs/transaction/ControlRun.java +10 -0
  21. data/src/main/java/org/embulk/output/hdfs/transaction/DeleteFilesInAdvanceTx.java +22 -0
  22. data/src/main/java/org/embulk/output/hdfs/transaction/DeleteRecursiveInAdvanceTx.java +22 -0
  23. data/src/main/java/org/embulk/output/hdfs/transaction/OverwriteTx.java +11 -0
  24. data/src/main/java/org/embulk/output/hdfs/transaction/ReplaceTx.java +62 -0
  25. data/src/main/java/org/embulk/output/hdfs/transaction/Tx.java +13 -0
  26. data/src/main/java/org/embulk/output/hdfs/util/SafeWorkspaceName.java +21 -0
  27. data/src/main/java/org/embulk/output/hdfs/util/SamplePath.java +21 -0
  28. data/src/main/java/org/embulk/output/hdfs/util/StrftimeUtil.java +23 -0
  29. data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +153 -22
  30. metadata +87 -70
@@ -0,0 +1,76 @@
1
+ package org.embulk.output.hdfs.compat;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.output.hdfs.ModeTask;
6
+ import org.embulk.output.hdfs.ModeTask.Mode;
7
+ import org.embulk.spi.Exec;
8
+ import org.slf4j.Logger;
9
+
10
+ import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask.DeleteInAdvancePolicy;
11
+ import static org.embulk.output.hdfs.ModeTask.Mode.ABORT_IF_EXIST;
12
+ import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_FILES_IN_ADVANCE;
13
+ import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_RECURSIVE_IN_ADVANCE;
14
+ import static org.embulk.output.hdfs.ModeTask.Mode.OVERWRITE;
15
+
16
+ @Deprecated
17
+ public class ModeCompat
18
+ {
19
+ private static final Logger logger = Exec.getLogger(ModeCompat.class);
20
+
21
+ private ModeCompat()
22
+ {
23
+ }
24
+
25
+ @Deprecated
26
+ public static Mode getMode(
27
+ ModeTask task,
28
+ Optional<Boolean> overwrite,
29
+ Optional<DeleteInAdvancePolicy> deleteInAdvancePolicy)
30
+ {
31
+ if (!overwrite.isPresent() && !deleteInAdvancePolicy.isPresent()) {
32
+ return task.getMode();
33
+ }
34
+
35
+ // Display Deprecated Messages
36
+ if (overwrite.isPresent()) {
37
+ logger.warn("`overwrite` option is Deprecated. Please use `mode` option instead.");
38
+ }
39
+ if (deleteInAdvancePolicy.isPresent()) {
40
+ logger.warn("`delete_in_advance` is Deprecated. Please use `mode` option instead.");
41
+ }
42
+ if (!task.getMode().isDefaultMode()) {
43
+ String msg = "`mode` option cannot be used with `overwrite` option or `delete_in_advance` option.";
44
+ logger.error(msg);
45
+ throw new ConfigException(msg);
46
+ }
47
+
48
+ // Select Mode for Compatibility
49
+ if (!deleteInAdvancePolicy.isPresent()) {
50
+ if (overwrite.get()) {
51
+ logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
52
+ return OVERWRITE;
53
+ }
54
+ logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
55
+ return ABORT_IF_EXIST;
56
+ }
57
+
58
+ switch (deleteInAdvancePolicy.get()) { // deleteInAdvancePolicy is always present.
59
+ case NONE:
60
+ if (overwrite.isPresent() && overwrite.get()) {
61
+ logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
62
+ return OVERWRITE;
63
+ }
64
+ logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
65
+ return ABORT_IF_EXIST;
66
+ case FILE_ONLY:
67
+ logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_FILES_IN_ADVANCE.toString()));
68
+ return DELETE_FILES_IN_ADVANCE;
69
+ case RECURSIVE:
70
+ logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_RECURSIVE_IN_ADVANCE.toString()));
71
+ return DELETE_RECURSIVE_IN_ADVANCE;
72
+ default:
73
+ throw new ConfigException(String.format("Unknown policy: %s", deleteInAdvancePolicy.get()));
74
+ }
75
+ }
76
+ }
@@ -0,0 +1,6 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ public class AbortIfExistTx
4
+ extends AbstractTx
5
+ {
6
+ }
@@ -0,0 +1,53 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.ConfigDiff;
4
+ import org.embulk.config.TaskReport;
5
+ import org.embulk.config.TaskSource;
6
+ import org.embulk.output.hdfs.HdfsFileOutput;
7
+ import org.embulk.output.hdfs.util.StrftimeUtil;
8
+ import org.embulk.spi.Exec;
9
+ import org.embulk.spi.TransactionalFileOutput;
10
+
11
+ import java.util.List;
12
+
13
+ import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
14
+
15
+ abstract class AbstractTx
16
+ implements Tx
17
+ {
18
+ protected void beforeRun(PluginTask task)
19
+ {
20
+ }
21
+
22
+ protected void afterRun(PluginTask task, List<TaskReport> reports)
23
+ {
24
+ }
25
+
26
+ protected ConfigDiff newConfigDiff()
27
+ {
28
+ return Exec.newConfigDiff();
29
+ }
30
+
31
+ public ConfigDiff transaction(PluginTask task, ControlRun control)
32
+ {
33
+ beforeRun(task);
34
+ List<TaskReport> reports = control.run();
35
+ afterRun(task, reports);
36
+ return newConfigDiff();
37
+ }
38
+
39
+ protected String getPathPrefix(PluginTask task)
40
+ {
41
+ return StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
42
+ }
43
+
44
+ protected boolean canOverwrite()
45
+ {
46
+ return false;
47
+ }
48
+
49
+ public TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex)
50
+ {
51
+ return new HdfsFileOutput(task, getPathPrefix(task), canOverwrite(), taskIndex);
52
+ }
53
+ }
@@ -0,0 +1,10 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.TaskReport;
4
+
5
+ import java.util.List;
6
+
7
+ public interface ControlRun
8
+ {
9
+ List<TaskReport> run();
10
+ }
@@ -0,0 +1,22 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.apache.hadoop.fs.Path;
4
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin;
5
+ import org.embulk.output.hdfs.client.HdfsClient;
6
+ import org.embulk.spi.Exec;
7
+ import org.slf4j.Logger;
8
+
9
+ public class DeleteFilesInAdvanceTx
10
+ extends AbstractTx
11
+ {
12
+ private static final Logger logger = Exec.getLogger(DeleteFilesInAdvanceTx.class);
13
+
14
+ @Override
15
+ protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
16
+ {
17
+ HdfsClient hdfsClient = HdfsClient.build(task);
18
+ Path globPath = new Path(getPathPrefix(task) + "*");
19
+ logger.info("Delete {} (File Only) in advance", globPath);
20
+ hdfsClient.globFilesAndTrash(globPath);
21
+ }
22
+ }
@@ -0,0 +1,22 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.apache.hadoop.fs.Path;
4
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin;
5
+ import org.embulk.output.hdfs.client.HdfsClient;
6
+ import org.embulk.spi.Exec;
7
+ import org.slf4j.Logger;
8
+
9
+ public class DeleteRecursiveInAdvanceTx
10
+ extends AbstractTx
11
+ {
12
+ private static final Logger logger = Exec.getLogger(DeleteRecursiveInAdvanceTx.class);
13
+
14
+ @Override
15
+ protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
16
+ {
17
+ HdfsClient hdfsClient = HdfsClient.build(task);
18
+ Path globPath = new Path(getPathPrefix(task) + "*");
19
+ logger.info("Delete {} (Recursive) in advance", globPath);
20
+ hdfsClient.globAndTrash(globPath);
21
+ }
22
+ }
@@ -0,0 +1,11 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ public class OverwriteTx
4
+ extends AbstractTx
5
+ {
6
+ @Override
7
+ protected boolean canOverwrite()
8
+ {
9
+ return true;
10
+ }
11
+ }
@@ -0,0 +1,62 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.config.TaskReport;
5
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
6
+ import org.embulk.output.hdfs.client.HdfsClient;
7
+ import org.embulk.output.hdfs.util.SafeWorkspaceName;
8
+ import org.embulk.output.hdfs.util.SamplePath;
9
+ import org.embulk.output.hdfs.util.StrftimeUtil;
10
+ import org.embulk.spi.Exec;
11
+ import org.slf4j.Logger;
12
+
13
+ import java.nio.file.Paths;
14
+ import java.util.List;
15
+
16
+ public class ReplaceTx
17
+ extends AbstractTx
18
+ {
19
+ private static final Logger logger = Exec.getLogger(ReplaceTx.class);
20
+
21
+ @Override
22
+ protected String getPathPrefix(PluginTask task)
23
+ {
24
+ return Paths.get(task.getSafeWorkspace(), super.getPathPrefix(task)).toString();
25
+ }
26
+
27
+ @Override
28
+ protected void beforeRun(PluginTask task)
29
+ {
30
+ HdfsClient hdfsClient = HdfsClient.build(task);
31
+ if (task.getSequenceFormat().contains("/")) {
32
+ throw new ConfigException("Must not include `/` in `sequence_format` if atomic is true.");
33
+ }
34
+
35
+ String safeWorkspace = SafeWorkspaceName.build(task.getWorkspace());
36
+ logger.info("Use as a workspace: {}", safeWorkspace);
37
+
38
+ String safeWsWithOutput = Paths.get(safeWorkspace, getOutputSampleDir(task)).toString();
39
+ logger.debug("The actual workspace must be with output dirs: {}", safeWsWithOutput);
40
+ if (!hdfsClient.mkdirs(safeWsWithOutput)) {
41
+ throw new ConfigException(String.format("Failed to make a directory: %s", safeWsWithOutput));
42
+ }
43
+ task.setSafeWorkspace(safeWorkspace);
44
+ }
45
+
46
+ @Override
47
+ protected void afterRun(PluginTask task, List<TaskReport> reports)
48
+ {
49
+ HdfsClient hdfsClient = HdfsClient.build(task);
50
+ String outputDir = getOutputSampleDir(task);
51
+ String safeWsWithOutput = Paths.get(task.getSafeWorkspace(), getOutputSampleDir(task)).toString();
52
+
53
+ hdfsClient.renameDirectory(safeWsWithOutput, outputDir, true);
54
+ logger.info("Store: {} >>> {}", safeWsWithOutput, outputDir);
55
+ }
56
+
57
+ private String getOutputSampleDir(PluginTask task)
58
+ {
59
+ String pathPrefix = StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
60
+ return SamplePath.getDir(pathPrefix, task.getSequenceFormat(), task.getFileExt());
61
+ }
62
+ }
@@ -0,0 +1,13 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.ConfigDiff;
4
+ import org.embulk.config.TaskSource;
5
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
6
+ import org.embulk.spi.TransactionalFileOutput;
7
+
8
+ public interface Tx
9
+ {
10
+ ConfigDiff transaction(PluginTask task, ControlRun control);
11
+
12
+ TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex);
13
+ }
@@ -0,0 +1,21 @@
1
+ package org.embulk.output.hdfs.util;
2
+
3
+ import java.nio.file.Paths;
4
+ import java.util.UUID;
5
+
6
+ public class SafeWorkspaceName
7
+ {
8
+ private static final String prefix = "embulk-output-hdfs";
9
+
10
+ private SafeWorkspaceName()
11
+ {
12
+ }
13
+
14
+ public static String build(String workspace)
15
+ {
16
+ long nanoTime = System.nanoTime();
17
+ String uuid = UUID.randomUUID().toString();
18
+ String dirname = String.format("%s_%d_%s", prefix, nanoTime, uuid);
19
+ return Paths.get(workspace, dirname).toString();
20
+ }
21
+ }
@@ -0,0 +1,21 @@
1
+ package org.embulk.output.hdfs.util;
2
+
3
+ import java.nio.file.Paths;
4
+
5
+ public class SamplePath
6
+ {
7
+ private SamplePath()
8
+ {
9
+ }
10
+
11
+ public static String getFile(String pathPrefix, String sequenceFormat, String fileExt)
12
+ {
13
+ return pathPrefix + String.format(sequenceFormat, 0, 0) + fileExt;
14
+ }
15
+
16
+ public static String getDir(String pathPrefix, String sequenceFormat, String fileExt)
17
+ {
18
+ String sampleFile = getFile(pathPrefix, sequenceFormat, fileExt);
19
+ return Paths.get(sampleFile).getParent().toString();
20
+ }
21
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.hdfs.util;
2
+
3
+ import org.jruby.embed.ScriptingContainer;
4
+
5
+ public class StrftimeUtil
6
+ {
7
+ private static final String scriptTemplate = "(Time.now - %d).strftime('%s')";
8
+
9
+ private StrftimeUtil()
10
+ {
11
+ }
12
+
13
+ public static String strftime(String format, int rewindSeconds)
14
+ {
15
+ String script = buildScript(format, rewindSeconds);
16
+ return new ScriptingContainer().runScriptlet(script).toString();
17
+ }
18
+
19
+ private static String buildScript(String format, int rewindSeconds)
20
+ {
21
+ return String.format(scriptTemplate, rewindSeconds, format);
22
+ }
23
+ }
@@ -4,6 +4,7 @@ import com.google.common.base.Charsets;
4
4
  import com.google.common.base.Optional;
5
5
  import com.google.common.collect.Lists;
6
6
  import com.google.common.collect.Maps;
7
+ import org.apache.hadoop.fs.FileAlreadyExistsException;
7
8
  import org.embulk.EmbulkTestRuntime;
8
9
  import org.embulk.config.ConfigException;
9
10
  import org.embulk.config.ConfigSource;
@@ -17,6 +18,7 @@ import org.embulk.spi.PageTestUtils;
17
18
  import org.embulk.spi.Schema;
18
19
  import org.embulk.spi.TransactionalPageOutput;
19
20
  import org.embulk.spi.time.Timestamp;
21
+ import org.junit.Assert;
20
22
  import org.junit.Before;
21
23
  import org.junit.Rule;
22
24
  import org.junit.Test;
@@ -37,9 +39,11 @@ import static org.embulk.spi.type.Types.*;
37
39
  import static org.hamcrest.CoreMatchers.containsString;
38
40
  import static org.hamcrest.CoreMatchers.hasItem;
39
41
  import static org.hamcrest.CoreMatchers.not;
42
+ import static org.hamcrest.core.Is.isA;
40
43
  import static org.junit.Assert.assertEquals;
41
44
  import static org.junit.Assert.assertNotEquals;
42
45
  import static org.junit.Assert.assertThat;
46
+ import static org.junit.Assert.assertTrue;
43
47
  import static org.msgpack.value.ValueFactory.newMap;
44
48
  import static org.msgpack.value.ValueFactory.newString;
45
49
 
@@ -107,9 +111,9 @@ public class TestHdfsFileOutputPlugin
107
111
  assertEquals(Lists.newArrayList(), task.getConfigFiles());
108
112
  assertEquals(Maps.newHashMap(), task.getConfig());
109
113
  assertEquals(0, task.getRewindSeconds());
110
- assertEquals(false, task.getOverwrite());
114
+ assertEquals(Optional.absent(), task.getOverwrite());
111
115
  assertEquals(Optional.absent(), task.getDoas());
112
- assertEquals(PluginTask.DeleteInAdvancePolicy.NONE, task.getDeleteInAdvance());
116
+ assertEquals(Optional.absent(), task.getDeleteInAdvance());
113
117
  }
114
118
 
115
119
  @Test(expected = ConfigException.class)
@@ -119,6 +123,14 @@ public class TestHdfsFileOutputPlugin
119
123
  PluginTask task = config.loadConfig(PluginTask.class);
120
124
  }
121
125
 
126
+ @Test(expected = ConfigException.class)
127
+ public void testSequenceFormatMode_replace()
128
+ {
129
+ run(getBaseConfigSource()
130
+ .set("mode", "replace")
131
+ .set("sequence_format", "%d/%d"));
132
+ }
133
+
122
134
  private List<String> lsR(List<String> names, java.nio.file.Path dir)
123
135
  {
124
136
  try (DirectoryStream<java.nio.file.Path> stream = Files.newDirectoryStream(dir)) {
@@ -201,14 +213,20 @@ public class TestHdfsFileOutputPlugin
201
213
  }
202
214
  }
203
215
 
216
+ private ConfigSource getDefaultFsConfig()
217
+ {
218
+ return Exec.newConfigSource()
219
+ .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
220
+ .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
221
+ .set("fs.trash.interval", "3600")
222
+ .set("fs.defaultFS", "file:///");
223
+ }
224
+
204
225
  @Test
205
226
  public void testBulkLoad()
206
227
  {
207
228
  ConfigSource config = getBaseConfigSource()
208
- .setNested("config", Exec.newConfigSource()
209
- .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
210
- .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
211
- .set("fs.defaultFS", "file:///"));
229
+ .setNested("config", getDefaultFsConfig());
212
230
 
213
231
  run(config);
214
232
  List<String> fileList = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
@@ -219,28 +237,25 @@ public class TestHdfsFileOutputPlugin
219
237
  }
220
238
 
221
239
  @Test
222
- public void testDeleteRECURSIVEInAdvance()
240
+ public void testDeleteInAdvance_RECURSIVE()
223
241
  throws IOException
224
242
  {
225
243
  for (int n = 0; n <= 10; n++) {
226
- tmpFolder.newFile("embulk-output-hdfs_file_" + n + ".txt");
227
- tmpFolder.newFolder("embulk-output-hdfs_directory_" + n);
244
+ tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_file_" + n + ".txt");
245
+ tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_" + n);
228
246
  }
229
247
 
230
248
  List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
231
249
 
232
250
  ConfigSource config = getBaseConfigSource()
233
- .setNested("config", Exec.newConfigSource()
234
- .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
235
- .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
236
- .set("fs.defaultFS", "file:///"))
251
+ .setNested("config", getDefaultFsConfig())
237
252
  .set("delete_in_advance", "RECURSIVE");
238
253
 
239
254
  run(config);
240
255
 
241
256
  List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
242
257
  assertNotEquals(fileListBeforeRun, fileListAfterRun);
243
- assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_directory"))));
258
+ assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_"))));
244
259
  assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
245
260
  assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
246
261
  assertRecordsInFile(String.format("%s/%s001.00.csv",
@@ -249,21 +264,18 @@ public class TestHdfsFileOutputPlugin
249
264
  }
250
265
 
251
266
  @Test
252
- public void testDeleteFILE_ONLYInAdvance()
267
+ public void testDeleteInAdvance_FILE_ONLY()
253
268
  throws IOException
254
269
  {
255
270
  for (int n = 0; n <= 10; n++) {
256
- tmpFolder.newFile("embulk-output-hdfs_file_" + n + ".txt");
257
- tmpFolder.newFolder("embulk-output-hdfs_directory_" + n);
271
+ tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_file_" + n + ".txt");
272
+ tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_" + n);
258
273
  }
259
274
 
260
275
  List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
261
276
 
262
277
  ConfigSource config = getBaseConfigSource()
263
- .setNested("config", Exec.newConfigSource()
264
- .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
265
- .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
266
- .set("fs.defaultFS", "file:///"))
278
+ .setNested("config", getDefaultFsConfig())
267
279
  .set("delete_in_advance", "FILE_ONLY");
268
280
 
269
281
  run(config);
@@ -271,7 +283,126 @@ public class TestHdfsFileOutputPlugin
271
283
  List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
272
284
  assertNotEquals(fileListBeforeRun, fileListAfterRun);
273
285
  assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
274
- assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_directory")));
286
+ assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_")));
287
+ assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
288
+ assertRecordsInFile(String.format("%s/%s001.00.csv",
289
+ tmpFolder.getRoot().getAbsolutePath(),
290
+ pathPrefix));
291
+ }
292
+
293
+ @Test
294
+ public void testMode_delete_recursive_in_advance()
295
+ throws IOException
296
+ {
297
+ for (int n = 0; n <= 10; n++) {
298
+ tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
299
+ tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
300
+ }
301
+
302
+ List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
303
+
304
+ ConfigSource config = getBaseConfigSource()
305
+ .setNested("config", getDefaultFsConfig())
306
+ .set("mode", "delete_recursive_in_advance");
307
+
308
+ run(config);
309
+
310
+ List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
311
+ assertNotEquals(fileListBeforeRun, fileListAfterRun);
312
+ assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
313
+ assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
314
+ assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
315
+ assertRecordsInFile(String.format("%s/%s001.00.csv",
316
+ tmpFolder.getRoot().getAbsolutePath(),
317
+ pathPrefix));
318
+ }
319
+
320
+ @Test
321
+ public void testMode_delete_files_in_advance()
322
+ throws IOException
323
+ {
324
+ for (int n = 0; n <= 10; n++) {
325
+ tmpFolder.newFile("embulk-output-hdfs_testMode_delete_files_in_advance_file_" + n + ".txt");
326
+ tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_files_in_advance_directory_" + n);
327
+ }
328
+
329
+ List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
330
+
331
+ ConfigSource config = getBaseConfigSource()
332
+ .setNested("config", getDefaultFsConfig())
333
+ .set("mode", "delete_files_in_advance");
334
+
335
+ run(config);
336
+
337
+ List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
338
+ assertNotEquals(fileListBeforeRun, fileListAfterRun);
339
+ assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
340
+ assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testMode_delete_files_in_advance_directory_")));
341
+ assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
342
+ assertRecordsInFile(String.format("%s/%s001.00.csv",
343
+ tmpFolder.getRoot().getAbsolutePath(),
344
+ pathPrefix));
345
+ }
346
+
347
+ @Test
348
+ public void testMode_abort_if_exist()
349
+ throws IOException
350
+ {
351
+ ConfigSource config = getBaseConfigSource()
352
+ .setNested("config", getDefaultFsConfig())
353
+ .set("mode", "abort_if_exist");
354
+
355
+ run(config);
356
+ try {
357
+ run(config);
358
+ }
359
+ catch (Exception e) {
360
+ Throwable t = e;
361
+ while (t != null) {
362
+ t = t.getCause();
363
+ if (t.getCause() instanceof FileAlreadyExistsException) {
364
+ Assert.assertTrue(true);
365
+ return;
366
+ }
367
+ }
368
+ Assert.fail("FileAlreadyExistsException is not cause.");
369
+ }
370
+
371
+ }
372
+
373
+ @Test
374
+ public void testMode_overwrite()
375
+ throws IOException
376
+ {
377
+ ConfigSource config = getBaseConfigSource()
378
+ .setNested("config", getDefaultFsConfig())
379
+ .set("mode", "overwrite");
380
+
381
+ run(config);
382
+ run(config);
383
+ Assert.assertTrue(true);
384
+ }
385
+
386
+ @Test
387
+ public void testMode_replace()
388
+ throws IOException
389
+ {
390
+ for (int n = 0; n <= 10; n++) {
391
+ tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
392
+ tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
393
+ }
394
+
395
+ List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
396
+
397
+
398
+ run(getBaseConfigSource()
399
+ .set("config", getDefaultFsConfig())
400
+ .set("mode", "replace"));
401
+
402
+ List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
403
+ assertNotEquals(fileListBeforeRun, fileListAfterRun);
404
+ assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
405
+ assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
275
406
  assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
276
407
  assertRecordsInFile(String.format("%s/%s001.00.csv",
277
408
  tmpFolder.getRoot().getAbsolutePath(),