embulk-output-hdfs 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -1
  3. data/CHANGELOG.md +9 -0
  4. data/README.md +38 -9
  5. data/build.gradle +10 -8
  6. data/example/config.yml +3 -1
  7. data/example/config_deprecated_option.yml +52 -0
  8. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  9. data/gradle/wrapper/gradle-wrapper.properties +1 -2
  10. data/gradlew +43 -35
  11. data/gradlew.bat +4 -10
  12. data/settings.gradle +1 -0
  13. data/src/main/java/org/embulk/output/hdfs/HdfsFileOutput.java +160 -0
  14. data/src/main/java/org/embulk/output/hdfs/HdfsFileOutputPlugin.java +55 -175
  15. data/src/main/java/org/embulk/output/hdfs/ModeTask.java +111 -0
  16. data/src/main/java/org/embulk/output/hdfs/client/HdfsClient.java +269 -0
  17. data/src/main/java/org/embulk/output/hdfs/compat/ModeCompat.java +76 -0
  18. data/src/main/java/org/embulk/output/hdfs/transaction/AbortIfExistTx.java +6 -0
  19. data/src/main/java/org/embulk/output/hdfs/transaction/AbstractTx.java +53 -0
  20. data/src/main/java/org/embulk/output/hdfs/transaction/ControlRun.java +10 -0
  21. data/src/main/java/org/embulk/output/hdfs/transaction/DeleteFilesInAdvanceTx.java +22 -0
  22. data/src/main/java/org/embulk/output/hdfs/transaction/DeleteRecursiveInAdvanceTx.java +22 -0
  23. data/src/main/java/org/embulk/output/hdfs/transaction/OverwriteTx.java +11 -0
  24. data/src/main/java/org/embulk/output/hdfs/transaction/ReplaceTx.java +62 -0
  25. data/src/main/java/org/embulk/output/hdfs/transaction/Tx.java +13 -0
  26. data/src/main/java/org/embulk/output/hdfs/util/SafeWorkspaceName.java +21 -0
  27. data/src/main/java/org/embulk/output/hdfs/util/SamplePath.java +21 -0
  28. data/src/main/java/org/embulk/output/hdfs/util/StrftimeUtil.java +23 -0
  29. data/src/test/java/org/embulk/output/hdfs/TestHdfsFileOutputPlugin.java +153 -22
  30. metadata +87 -70
@@ -0,0 +1,76 @@
1
+ package org.embulk.output.hdfs.compat;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.ConfigException;
5
+ import org.embulk.output.hdfs.ModeTask;
6
+ import org.embulk.output.hdfs.ModeTask.Mode;
7
+ import org.embulk.spi.Exec;
8
+ import org.slf4j.Logger;
9
+
10
+ import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask.DeleteInAdvancePolicy;
11
+ import static org.embulk.output.hdfs.ModeTask.Mode.ABORT_IF_EXIST;
12
+ import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_FILES_IN_ADVANCE;
13
+ import static org.embulk.output.hdfs.ModeTask.Mode.DELETE_RECURSIVE_IN_ADVANCE;
14
+ import static org.embulk.output.hdfs.ModeTask.Mode.OVERWRITE;
15
+
16
+ @Deprecated
17
+ public class ModeCompat
18
+ {
19
+ private static final Logger logger = Exec.getLogger(ModeCompat.class);
20
+
21
+ private ModeCompat()
22
+ {
23
+ }
24
+
25
+ @Deprecated
26
+ public static Mode getMode(
27
+ ModeTask task,
28
+ Optional<Boolean> overwrite,
29
+ Optional<DeleteInAdvancePolicy> deleteInAdvancePolicy)
30
+ {
31
+ if (!overwrite.isPresent() && !deleteInAdvancePolicy.isPresent()) {
32
+ return task.getMode();
33
+ }
34
+
35
+ // Display Deprecated Messages
36
+ if (overwrite.isPresent()) {
37
+ logger.warn("`overwrite` option is Deprecated. Please use `mode` option instead.");
38
+ }
39
+ if (deleteInAdvancePolicy.isPresent()) {
40
+ logger.warn("`delete_in_advance` is Deprecated. Please use `mode` option instead.");
41
+ }
42
+ if (!task.getMode().isDefaultMode()) {
43
+ String msg = "`mode` option cannot be used with `overwrite` option or `delete_in_advance` option.";
44
+ logger.error(msg);
45
+ throw new ConfigException(msg);
46
+ }
47
+
48
+ // Select Mode for Compatibility
49
+ if (!deleteInAdvancePolicy.isPresent()) {
50
+ if (overwrite.get()) {
51
+ logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
52
+ return OVERWRITE;
53
+ }
54
+ logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
55
+ return ABORT_IF_EXIST;
56
+ }
57
+
58
+ switch (deleteInAdvancePolicy.get()) { // deleteInAdvancePolicy is always present.
59
+ case NONE:
60
+ if (overwrite.isPresent() && overwrite.get()) {
61
+ logger.warn(String.format("Select `mode: %s` for compatibility", OVERWRITE.toString()));
62
+ return OVERWRITE;
63
+ }
64
+ logger.warn(String.format("Select `mode: %s` for compatibility", ABORT_IF_EXIST.toString()));
65
+ return ABORT_IF_EXIST;
66
+ case FILE_ONLY:
67
+ logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_FILES_IN_ADVANCE.toString()));
68
+ return DELETE_FILES_IN_ADVANCE;
69
+ case RECURSIVE:
70
+ logger.warn(String.format("Select `mode: %s` for compatibility", DELETE_RECURSIVE_IN_ADVANCE.toString()));
71
+ return DELETE_RECURSIVE_IN_ADVANCE;
72
+ default:
73
+ throw new ConfigException(String.format("Unknown policy: %s", deleteInAdvancePolicy.get()));
74
+ }
75
+ }
76
+ }
@@ -0,0 +1,6 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ public class AbortIfExistTx
4
+ extends AbstractTx
5
+ {
6
+ }
@@ -0,0 +1,53 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.ConfigDiff;
4
+ import org.embulk.config.TaskReport;
5
+ import org.embulk.config.TaskSource;
6
+ import org.embulk.output.hdfs.HdfsFileOutput;
7
+ import org.embulk.output.hdfs.util.StrftimeUtil;
8
+ import org.embulk.spi.Exec;
9
+ import org.embulk.spi.TransactionalFileOutput;
10
+
11
+ import java.util.List;
12
+
13
+ import static org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
14
+
15
+ abstract class AbstractTx
16
+ implements Tx
17
+ {
18
+ protected void beforeRun(PluginTask task)
19
+ {
20
+ }
21
+
22
+ protected void afterRun(PluginTask task, List<TaskReport> reports)
23
+ {
24
+ }
25
+
26
+ protected ConfigDiff newConfigDiff()
27
+ {
28
+ return Exec.newConfigDiff();
29
+ }
30
+
31
+ public ConfigDiff transaction(PluginTask task, ControlRun control)
32
+ {
33
+ beforeRun(task);
34
+ List<TaskReport> reports = control.run();
35
+ afterRun(task, reports);
36
+ return newConfigDiff();
37
+ }
38
+
39
+ protected String getPathPrefix(PluginTask task)
40
+ {
41
+ return StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
42
+ }
43
+
44
+ protected boolean canOverwrite()
45
+ {
46
+ return false;
47
+ }
48
+
49
+ public TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex)
50
+ {
51
+ return new HdfsFileOutput(task, getPathPrefix(task), canOverwrite(), taskIndex);
52
+ }
53
+ }
@@ -0,0 +1,10 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.TaskReport;
4
+
5
+ import java.util.List;
6
+
7
+ public interface ControlRun
8
+ {
9
+ List<TaskReport> run();
10
+ }
@@ -0,0 +1,22 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.apache.hadoop.fs.Path;
4
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin;
5
+ import org.embulk.output.hdfs.client.HdfsClient;
6
+ import org.embulk.spi.Exec;
7
+ import org.slf4j.Logger;
8
+
9
+ public class DeleteFilesInAdvanceTx
10
+ extends AbstractTx
11
+ {
12
+ private static final Logger logger = Exec.getLogger(DeleteFilesInAdvanceTx.class);
13
+
14
+ @Override
15
+ protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
16
+ {
17
+ HdfsClient hdfsClient = HdfsClient.build(task);
18
+ Path globPath = new Path(getPathPrefix(task) + "*");
19
+ logger.info("Delete {} (File Only) in advance", globPath);
20
+ hdfsClient.globFilesAndTrash(globPath);
21
+ }
22
+ }
@@ -0,0 +1,22 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.apache.hadoop.fs.Path;
4
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin;
5
+ import org.embulk.output.hdfs.client.HdfsClient;
6
+ import org.embulk.spi.Exec;
7
+ import org.slf4j.Logger;
8
+
9
+ public class DeleteRecursiveInAdvanceTx
10
+ extends AbstractTx
11
+ {
12
+ private static final Logger logger = Exec.getLogger(DeleteRecursiveInAdvanceTx.class);
13
+
14
+ @Override
15
+ protected void beforeRun(HdfsFileOutputPlugin.PluginTask task)
16
+ {
17
+ HdfsClient hdfsClient = HdfsClient.build(task);
18
+ Path globPath = new Path(getPathPrefix(task) + "*");
19
+ logger.info("Delete {} (Recursive) in advance", globPath);
20
+ hdfsClient.globAndTrash(globPath);
21
+ }
22
+ }
@@ -0,0 +1,11 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ public class OverwriteTx
4
+ extends AbstractTx
5
+ {
6
+ @Override
7
+ protected boolean canOverwrite()
8
+ {
9
+ return true;
10
+ }
11
+ }
@@ -0,0 +1,62 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.ConfigException;
4
+ import org.embulk.config.TaskReport;
5
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
6
+ import org.embulk.output.hdfs.client.HdfsClient;
7
+ import org.embulk.output.hdfs.util.SafeWorkspaceName;
8
+ import org.embulk.output.hdfs.util.SamplePath;
9
+ import org.embulk.output.hdfs.util.StrftimeUtil;
10
+ import org.embulk.spi.Exec;
11
+ import org.slf4j.Logger;
12
+
13
+ import java.nio.file.Paths;
14
+ import java.util.List;
15
+
16
+ public class ReplaceTx
17
+ extends AbstractTx
18
+ {
19
+ private static final Logger logger = Exec.getLogger(ReplaceTx.class);
20
+
21
+ @Override
22
+ protected String getPathPrefix(PluginTask task)
23
+ {
24
+ return Paths.get(task.getSafeWorkspace(), super.getPathPrefix(task)).toString();
25
+ }
26
+
27
+ @Override
28
+ protected void beforeRun(PluginTask task)
29
+ {
30
+ HdfsClient hdfsClient = HdfsClient.build(task);
31
+ if (task.getSequenceFormat().contains("/")) {
32
+ throw new ConfigException("Must not include `/` in `sequence_format` if atomic is true.");
33
+ }
34
+
35
+ String safeWorkspace = SafeWorkspaceName.build(task.getWorkspace());
36
+ logger.info("Use as a workspace: {}", safeWorkspace);
37
+
38
+ String safeWsWithOutput = Paths.get(safeWorkspace, getOutputSampleDir(task)).toString();
39
+ logger.debug("The actual workspace must be with output dirs: {}", safeWsWithOutput);
40
+ if (!hdfsClient.mkdirs(safeWsWithOutput)) {
41
+ throw new ConfigException(String.format("Failed to make a directory: %s", safeWsWithOutput));
42
+ }
43
+ task.setSafeWorkspace(safeWorkspace);
44
+ }
45
+
46
+ @Override
47
+ protected void afterRun(PluginTask task, List<TaskReport> reports)
48
+ {
49
+ HdfsClient hdfsClient = HdfsClient.build(task);
50
+ String outputDir = getOutputSampleDir(task);
51
+ String safeWsWithOutput = Paths.get(task.getSafeWorkspace(), getOutputSampleDir(task)).toString();
52
+
53
+ hdfsClient.renameDirectory(safeWsWithOutput, outputDir, true);
54
+ logger.info("Store: {} >>> {}", safeWsWithOutput, outputDir);
55
+ }
56
+
57
+ private String getOutputSampleDir(PluginTask task)
58
+ {
59
+ String pathPrefix = StrftimeUtil.strftime(task.getPathPrefix(), task.getRewindSeconds());
60
+ return SamplePath.getDir(pathPrefix, task.getSequenceFormat(), task.getFileExt());
61
+ }
62
+ }
@@ -0,0 +1,13 @@
1
+ package org.embulk.output.hdfs.transaction;
2
+
3
+ import org.embulk.config.ConfigDiff;
4
+ import org.embulk.config.TaskSource;
5
+ import org.embulk.output.hdfs.HdfsFileOutputPlugin.PluginTask;
6
+ import org.embulk.spi.TransactionalFileOutput;
7
+
8
+ public interface Tx
9
+ {
10
+ ConfigDiff transaction(PluginTask task, ControlRun control);
11
+
12
+ TransactionalFileOutput newOutput(PluginTask task, TaskSource taskSource, int taskIndex);
13
+ }
@@ -0,0 +1,21 @@
1
+ package org.embulk.output.hdfs.util;
2
+
3
+ import java.nio.file.Paths;
4
+ import java.util.UUID;
5
+
6
+ public class SafeWorkspaceName
7
+ {
8
+ private static final String prefix = "embulk-output-hdfs";
9
+
10
+ private SafeWorkspaceName()
11
+ {
12
+ }
13
+
14
+ public static String build(String workspace)
15
+ {
16
+ long nanoTime = System.nanoTime();
17
+ String uuid = UUID.randomUUID().toString();
18
+ String dirname = String.format("%s_%d_%s", prefix, nanoTime, uuid);
19
+ return Paths.get(workspace, dirname).toString();
20
+ }
21
+ }
@@ -0,0 +1,21 @@
1
+ package org.embulk.output.hdfs.util;
2
+
3
+ import java.nio.file.Paths;
4
+
5
+ public class SamplePath
6
+ {
7
+ private SamplePath()
8
+ {
9
+ }
10
+
11
+ public static String getFile(String pathPrefix, String sequenceFormat, String fileExt)
12
+ {
13
+ return pathPrefix + String.format(sequenceFormat, 0, 0) + fileExt;
14
+ }
15
+
16
+ public static String getDir(String pathPrefix, String sequenceFormat, String fileExt)
17
+ {
18
+ String sampleFile = getFile(pathPrefix, sequenceFormat, fileExt);
19
+ return Paths.get(sampleFile).getParent().toString();
20
+ }
21
+ }
@@ -0,0 +1,23 @@
1
+ package org.embulk.output.hdfs.util;
2
+
3
+ import org.jruby.embed.ScriptingContainer;
4
+
5
+ public class StrftimeUtil
6
+ {
7
+ private static final String scriptTemplate = "(Time.now - %d).strftime('%s')";
8
+
9
+ private StrftimeUtil()
10
+ {
11
+ }
12
+
13
+ public static String strftime(String format, int rewindSeconds)
14
+ {
15
+ String script = buildScript(format, rewindSeconds);
16
+ return new ScriptingContainer().runScriptlet(script).toString();
17
+ }
18
+
19
+ private static String buildScript(String format, int rewindSeconds)
20
+ {
21
+ return String.format(scriptTemplate, rewindSeconds, format);
22
+ }
23
+ }
@@ -4,6 +4,7 @@ import com.google.common.base.Charsets;
4
4
  import com.google.common.base.Optional;
5
5
  import com.google.common.collect.Lists;
6
6
  import com.google.common.collect.Maps;
7
+ import org.apache.hadoop.fs.FileAlreadyExistsException;
7
8
  import org.embulk.EmbulkTestRuntime;
8
9
  import org.embulk.config.ConfigException;
9
10
  import org.embulk.config.ConfigSource;
@@ -17,6 +18,7 @@ import org.embulk.spi.PageTestUtils;
17
18
  import org.embulk.spi.Schema;
18
19
  import org.embulk.spi.TransactionalPageOutput;
19
20
  import org.embulk.spi.time.Timestamp;
21
+ import org.junit.Assert;
20
22
  import org.junit.Before;
21
23
  import org.junit.Rule;
22
24
  import org.junit.Test;
@@ -37,9 +39,11 @@ import static org.embulk.spi.type.Types.*;
37
39
  import static org.hamcrest.CoreMatchers.containsString;
38
40
  import static org.hamcrest.CoreMatchers.hasItem;
39
41
  import static org.hamcrest.CoreMatchers.not;
42
+ import static org.hamcrest.core.Is.isA;
40
43
  import static org.junit.Assert.assertEquals;
41
44
  import static org.junit.Assert.assertNotEquals;
42
45
  import static org.junit.Assert.assertThat;
46
+ import static org.junit.Assert.assertTrue;
43
47
  import static org.msgpack.value.ValueFactory.newMap;
44
48
  import static org.msgpack.value.ValueFactory.newString;
45
49
 
@@ -107,9 +111,9 @@ public class TestHdfsFileOutputPlugin
107
111
  assertEquals(Lists.newArrayList(), task.getConfigFiles());
108
112
  assertEquals(Maps.newHashMap(), task.getConfig());
109
113
  assertEquals(0, task.getRewindSeconds());
110
- assertEquals(false, task.getOverwrite());
114
+ assertEquals(Optional.absent(), task.getOverwrite());
111
115
  assertEquals(Optional.absent(), task.getDoas());
112
- assertEquals(PluginTask.DeleteInAdvancePolicy.NONE, task.getDeleteInAdvance());
116
+ assertEquals(Optional.absent(), task.getDeleteInAdvance());
113
117
  }
114
118
 
115
119
  @Test(expected = ConfigException.class)
@@ -119,6 +123,14 @@ public class TestHdfsFileOutputPlugin
119
123
  PluginTask task = config.loadConfig(PluginTask.class);
120
124
  }
121
125
 
126
+ @Test(expected = ConfigException.class)
127
+ public void testSequenceFormatMode_replace()
128
+ {
129
+ run(getBaseConfigSource()
130
+ .set("mode", "replace")
131
+ .set("sequence_format", "%d/%d"));
132
+ }
133
+
122
134
  private List<String> lsR(List<String> names, java.nio.file.Path dir)
123
135
  {
124
136
  try (DirectoryStream<java.nio.file.Path> stream = Files.newDirectoryStream(dir)) {
@@ -201,14 +213,20 @@ public class TestHdfsFileOutputPlugin
201
213
  }
202
214
  }
203
215
 
216
+ private ConfigSource getDefaultFsConfig()
217
+ {
218
+ return Exec.newConfigSource()
219
+ .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
220
+ .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
221
+ .set("fs.trash.interval", "3600")
222
+ .set("fs.defaultFS", "file:///");
223
+ }
224
+
204
225
  @Test
205
226
  public void testBulkLoad()
206
227
  {
207
228
  ConfigSource config = getBaseConfigSource()
208
- .setNested("config", Exec.newConfigSource()
209
- .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
210
- .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
211
- .set("fs.defaultFS", "file:///"));
229
+ .setNested("config", getDefaultFsConfig());
212
230
 
213
231
  run(config);
214
232
  List<String> fileList = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
@@ -219,28 +237,25 @@ public class TestHdfsFileOutputPlugin
219
237
  }
220
238
 
221
239
  @Test
222
- public void testDeleteRECURSIVEInAdvance()
240
+ public void testDeleteInAdvance_RECURSIVE()
223
241
  throws IOException
224
242
  {
225
243
  for (int n = 0; n <= 10; n++) {
226
- tmpFolder.newFile("embulk-output-hdfs_file_" + n + ".txt");
227
- tmpFolder.newFolder("embulk-output-hdfs_directory_" + n);
244
+ tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_file_" + n + ".txt");
245
+ tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_" + n);
228
246
  }
229
247
 
230
248
  List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
231
249
 
232
250
  ConfigSource config = getBaseConfigSource()
233
- .setNested("config", Exec.newConfigSource()
234
- .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
235
- .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
236
- .set("fs.defaultFS", "file:///"))
251
+ .setNested("config", getDefaultFsConfig())
237
252
  .set("delete_in_advance", "RECURSIVE");
238
253
 
239
254
  run(config);
240
255
 
241
256
  List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
242
257
  assertNotEquals(fileListBeforeRun, fileListAfterRun);
243
- assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_directory"))));
258
+ assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_RECURSIVE_directory_"))));
244
259
  assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
245
260
  assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
246
261
  assertRecordsInFile(String.format("%s/%s001.00.csv",
@@ -249,21 +264,18 @@ public class TestHdfsFileOutputPlugin
249
264
  }
250
265
 
251
266
  @Test
252
- public void testDeleteFILE_ONLYInAdvance()
267
+ public void testDeleteInAdvance_FILE_ONLY()
253
268
  throws IOException
254
269
  {
255
270
  for (int n = 0; n <= 10; n++) {
256
- tmpFolder.newFile("embulk-output-hdfs_file_" + n + ".txt");
257
- tmpFolder.newFolder("embulk-output-hdfs_directory_" + n);
271
+ tmpFolder.newFile("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_file_" + n + ".txt");
272
+ tmpFolder.newFolder("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_" + n);
258
273
  }
259
274
 
260
275
  List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
261
276
 
262
277
  ConfigSource config = getBaseConfigSource()
263
- .setNested("config", Exec.newConfigSource()
264
- .set("fs.hdfs.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
265
- .set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem")
266
- .set("fs.defaultFS", "file:///"))
278
+ .setNested("config", getDefaultFsConfig())
267
279
  .set("delete_in_advance", "FILE_ONLY");
268
280
 
269
281
  run(config);
@@ -271,7 +283,126 @@ public class TestHdfsFileOutputPlugin
271
283
  List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
272
284
  assertNotEquals(fileListBeforeRun, fileListAfterRun);
273
285
  assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
274
- assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_directory")));
286
+ assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testDeleteInAdvance_FILE_ONLY_directory_")));
287
+ assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
288
+ assertRecordsInFile(String.format("%s/%s001.00.csv",
289
+ tmpFolder.getRoot().getAbsolutePath(),
290
+ pathPrefix));
291
+ }
292
+
293
+ @Test
294
+ public void testMode_delete_recursive_in_advance()
295
+ throws IOException
296
+ {
297
+ for (int n = 0; n <= 10; n++) {
298
+ tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
299
+ tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
300
+ }
301
+
302
+ List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
303
+
304
+ ConfigSource config = getBaseConfigSource()
305
+ .setNested("config", getDefaultFsConfig())
306
+ .set("mode", "delete_recursive_in_advance");
307
+
308
+ run(config);
309
+
310
+ List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
311
+ assertNotEquals(fileListBeforeRun, fileListAfterRun);
312
+ assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
313
+ assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
314
+ assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
315
+ assertRecordsInFile(String.format("%s/%s001.00.csv",
316
+ tmpFolder.getRoot().getAbsolutePath(),
317
+ pathPrefix));
318
+ }
319
+
320
+ @Test
321
+ public void testMode_delete_files_in_advance()
322
+ throws IOException
323
+ {
324
+ for (int n = 0; n <= 10; n++) {
325
+ tmpFolder.newFile("embulk-output-hdfs_testMode_delete_files_in_advance_file_" + n + ".txt");
326
+ tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_files_in_advance_directory_" + n);
327
+ }
328
+
329
+ List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
330
+
331
+ ConfigSource config = getBaseConfigSource()
332
+ .setNested("config", getDefaultFsConfig())
333
+ .set("mode", "delete_files_in_advance");
334
+
335
+ run(config);
336
+
337
+ List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
338
+ assertNotEquals(fileListBeforeRun, fileListAfterRun);
339
+ assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
340
+ assertThat(fileListAfterRun, hasItem(containsString("embulk-output-hdfs_testMode_delete_files_in_advance_directory_")));
341
+ assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
342
+ assertRecordsInFile(String.format("%s/%s001.00.csv",
343
+ tmpFolder.getRoot().getAbsolutePath(),
344
+ pathPrefix));
345
+ }
346
+
347
+ @Test
348
+ public void testMode_abort_if_exist()
349
+ throws IOException
350
+ {
351
+ ConfigSource config = getBaseConfigSource()
352
+ .setNested("config", getDefaultFsConfig())
353
+ .set("mode", "abort_if_exist");
354
+
355
+ run(config);
356
+ try {
357
+ run(config);
358
+ }
359
+ catch (Exception e) {
360
+ Throwable t = e;
361
+ while (t != null) {
362
+ t = t.getCause();
363
+ if (t.getCause() instanceof FileAlreadyExistsException) {
364
+ Assert.assertTrue(true);
365
+ return;
366
+ }
367
+ }
368
+ Assert.fail("FileAlreadyExistsException is not cause.");
369
+ }
370
+
371
+ }
372
+
373
+ @Test
374
+ public void testMode_overwrite()
375
+ throws IOException
376
+ {
377
+ ConfigSource config = getBaseConfigSource()
378
+ .setNested("config", getDefaultFsConfig())
379
+ .set("mode", "overwrite");
380
+
381
+ run(config);
382
+ run(config);
383
+ Assert.assertTrue(true);
384
+ }
385
+
386
+ @Test
387
+ public void testMode_replace()
388
+ throws IOException
389
+ {
390
+ for (int n = 0; n <= 10; n++) {
391
+ tmpFolder.newFile("embulk-output-hdfs_testMode_delete_recursive_in_advance_file_" + n + ".txt");
392
+ tmpFolder.newFolder("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_" + n);
393
+ }
394
+
395
+ List<String> fileListBeforeRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
396
+
397
+
398
+ run(getBaseConfigSource()
399
+ .set("config", getDefaultFsConfig())
400
+ .set("mode", "replace"));
401
+
402
+ List<String> fileListAfterRun = lsR(Lists.<String>newArrayList(), Paths.get(tmpFolder.getRoot().getAbsolutePath()));
403
+ assertNotEquals(fileListBeforeRun, fileListAfterRun);
404
+ assertThat(fileListAfterRun, not(hasItem(containsString("embulk-output-hdfs_testMode_delete_recursive_in_advance_directory_"))));
405
+ assertThat(fileListAfterRun, not(hasItem(containsString("txt"))));
275
406
  assertThat(fileListAfterRun, hasItem(containsString(pathPrefix + "001.00.csv")));
276
407
  assertRecordsInFile(String.format("%s/%s001.00.csv",
277
408
  tmpFolder.getRoot().getAbsolutePath(),