embulk-input-filesplit 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +41 -40
  3. data/build.gradle +64 -64
  4. data/classpath/embulk-input-filesplit-0.1.4.jar +0 -0
  5. data/lib/embulk/input/filesplit.rb +3 -3
  6. data/src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java +300 -187
  7. data/src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java +70 -70
  8. data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java +129 -94
  9. data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java +78 -78
  10. data/src/test/java/org/embulk/input/filesplit/PartialFileInputStreamTest.java +570 -570
  11. data/src/test/resources/data/sub1/test1.csv +1 -0
  12. data/src/test/resources/data/sub1/test2.csv +3 -0
  13. data/src/test/resources/data/sub2/test1.csv +1 -0
  14. data/src/test/resources/data/sub2/test2.csv +3 -0
  15. data/src/test/resources/data/sub2/x.csv +1 -0
  16. data/src/test/resources/data/test-header.csv +5 -5
  17. data/src/test/resources/data/test-semicolon.csv +4 -4
  18. data/src/test/resources/data/test.csv +4 -4
  19. data/src/test/resources/yml/test-error1.yml +22 -0
  20. data/src/test/resources/yml/test-error2.yml +24 -0
  21. data/src/test/resources/yml/test-header.yml +24 -24
  22. data/src/test/resources/yml/test-only-header.yml +24 -24
  23. data/src/test/resources/yml/test-path_prefix-directory.yml +23 -0
  24. data/src/test/resources/yml/test-path_prefix-files.yml +23 -0
  25. data/src/test/resources/yml/test-tasks.yml +23 -23
  26. data/src/test/resources/yml/test.yml +22 -22
  27. metadata +15 -6
  28. data/classpath/embulk-input-filesplit-0.1.3.jar +0 -0
@@ -1,70 +1,70 @@
1
- package org.embulk.input.filesplit;
2
-
3
- import java.io.BufferedReader;
4
- import java.io.BufferedWriter;
5
- import java.io.File;
6
- import java.io.FileReader;
7
- import java.io.FileWriter;
8
- import java.util.regex.Matcher;
9
- import java.util.regex.Pattern;
10
-
11
- import org.embulk.EmbulkEmbed;
12
- import org.embulk.EmbulkEmbed.Bootstrap;
13
- import org.embulk.config.ConfigSource;
14
- import org.embulk.plugin.InjectedPluginSource;
15
-
16
- import com.google.inject.Binder;
17
- import com.google.inject.Module;
18
-
19
-
20
- public class EmbulkPluginTester {
21
-
22
- private EmbulkEmbed embulk;
23
-
24
- public EmbulkPluginTester(final Class<?> iface, final String name, final Class<?> impl)
25
- {
26
- Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
27
- bootstrap.addModules(new Module()
28
- {
29
- @Override
30
- public void configure(Binder binder)
31
- {
32
- InjectedPluginSource.registerPluginTo(binder, iface, name, impl);
33
- }
34
- });
35
- embulk = bootstrap.initializeCloseable();
36
- }
37
-
38
- public void run(String ymlPath) throws Exception
39
- {
40
- ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(convert(ymlPath)));
41
- embulk.run(config);
42
- }
43
-
44
- private String convert(String yml) throws Exception
45
- {
46
- File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
47
- File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
48
- File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
49
- Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
50
- try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
51
- try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
52
- String line;
53
- while ((line = reader.readLine()) != null) {
54
- Matcher matcher = pathPrefixPattern.matcher(line);
55
- if (matcher.matches()) {
56
- int group = 2;
57
- writer.write(line.substring(0, matcher.start(group)));
58
- writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
59
- writer.write(line.substring(matcher.end(group)));
60
- } else {
61
- writer.write(line);
62
- }
63
- writer.newLine();
64
- }
65
- }
66
- }
67
- return tempYmlPath.getAbsolutePath();
68
- }
69
-
70
- }
1
+ package org.embulk.input.filesplit;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.BufferedWriter;
5
+ import java.io.File;
6
+ import java.io.FileReader;
7
+ import java.io.FileWriter;
8
+ import java.util.regex.Matcher;
9
+ import java.util.regex.Pattern;
10
+
11
+ import org.embulk.EmbulkEmbed;
12
+ import org.embulk.EmbulkEmbed.Bootstrap;
13
+ import org.embulk.config.ConfigSource;
14
+ import org.embulk.plugin.InjectedPluginSource;
15
+
16
+ import com.google.inject.Binder;
17
+ import com.google.inject.Module;
18
+
19
+
20
+ public class EmbulkPluginTester {
21
+
22
+ private EmbulkEmbed embulk;
23
+
24
+ public EmbulkPluginTester(final Class<?> iface, final String name, final Class<?> impl)
25
+ {
26
+ Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
27
+ bootstrap.addModules(new Module()
28
+ {
29
+ @Override
30
+ public void configure(Binder binder)
31
+ {
32
+ InjectedPluginSource.registerPluginTo(binder, iface, name, impl);
33
+ }
34
+ });
35
+ embulk = bootstrap.initializeCloseable();
36
+ }
37
+
38
+ public void run(String ymlPath) throws Exception
39
+ {
40
+ ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(convert(ymlPath)));
41
+ embulk.run(config);
42
+ }
43
+
44
+ private String convert(String yml) throws Exception
45
+ {
46
+ File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
47
+ File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
48
+ File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
49
+ Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
50
+ try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
51
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
52
+ String line;
53
+ while ((line = reader.readLine()) != null) {
54
+ Matcher matcher = pathPrefixPattern.matcher(line);
55
+ if (matcher.matches()) {
56
+ int group = 2;
57
+ writer.write(line.substring(0, matcher.start(group)));
58
+ writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
59
+ writer.write(line.substring(matcher.end(group)));
60
+ } else {
61
+ writer.write(line);
62
+ }
63
+ writer.newLine();
64
+ }
65
+ }
66
+ }
67
+ return tempYmlPath.getAbsolutePath();
68
+ }
69
+
70
+ }
@@ -1,94 +1,129 @@
1
- package org.embulk.input.filesplit;
2
-
3
- import java.io.File;
4
- import java.io.IOException;
5
- import java.net.URISyntaxException;
6
- import java.nio.charset.Charset;
7
- import java.nio.file.FileSystem;
8
- import java.nio.file.FileSystems;
9
- import java.nio.file.Files;
10
- import java.util.ArrayList;
11
- import java.util.Collections;
12
- import java.util.List;
13
-
14
- import org.embulk.spi.InputPlugin;
15
- import org.junit.Test;
16
- import static org.junit.Assert.assertEquals;
17
-
18
- public class LocalFileSplitInputPluginTest {
19
-
20
- private static EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
21
-
22
- @Test
23
- public void test() throws Exception
24
- {
25
- run("/yml/test.yml", "/data/test-semicolon.csv");
26
- }
27
-
28
- @Test
29
- public void testTasks() throws Exception
30
- {
31
- run("/yml/test-tasks.yml", "/data/test-semicolon.csv");
32
- }
33
-
34
- @Test
35
- public void testHeader() throws Exception
36
- {
37
- run("/yml/test-header.yml", "/data/test-semicolon.csv");
38
- }
39
-
40
- @Test
41
- public void testOnlyHeader() throws Exception
42
- {
43
- run("/yml/test-only-header.yml", "/data/empty.csv");
44
- }
45
-
46
- private void run(String ymlPath, String expectedName) throws Exception
47
- {
48
- List<String> expected = readAll(expectedName);
49
- Collections.sort(expected);
50
-
51
- File file = prepare();
52
- tester.run(ymlPath);
53
-
54
- List<String> actual= readAll(file);
55
- Collections.sort(actual);
56
-
57
- assertEquals(expected, actual);
58
- }
59
-
60
- private File prepare() throws URISyntaxException
61
- {
62
- File file = new File(new File(getClass().getResource("/resource.txt").toURI()).getParentFile(), "temp");
63
- file.mkdir();
64
- for (File child : file.listFiles()) {
65
- child.delete();
66
- }
67
- return file;
68
- }
69
-
70
- private List<String> readAll(String name) throws IOException, URISyntaxException
71
- {
72
- return readAll(new File(getClass().getResource(name).toURI()));
73
- }
74
-
75
- private List<String> readAll(File file) throws IOException
76
- {
77
- if (file.isFile()) {
78
- FileSystem fs = FileSystems.getDefault();
79
- Charset charset = Charset.forName("UTF-8");
80
- return Files.readAllLines(fs.getPath(file.getAbsolutePath()), charset);
81
- }
82
-
83
- if (file.isDirectory()) {
84
- List<String> lines = new ArrayList<String>();
85
- for (File child : file.listFiles()) {
86
- lines.addAll(readAll(child));
87
- }
88
- return lines;
89
- }
90
-
91
- return Collections.emptyList();
92
- }
93
-
94
- }
1
+ package org.embulk.input.filesplit;
2
+
3
+ import java.io.File;
4
+ import java.io.IOException;
5
+ import java.net.URISyntaxException;
6
+ import java.nio.charset.Charset;
7
+ import java.nio.file.FileSystem;
8
+ import java.nio.file.FileSystems;
9
+ import java.nio.file.Files;
10
+ import java.util.ArrayList;
11
+ import java.util.Collections;
12
+ import java.util.List;
13
+
14
+ import org.embulk.spi.InputPlugin;
15
+ import org.junit.Test;
16
+ import static org.junit.Assert.assertEquals;
17
+ import static org.junit.Assert.fail;
18
+
19
+ public class LocalFileSplitInputPluginTest {
20
+
21
+ private static EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
22
+
23
+ @Test
24
+ public void test() throws Exception
25
+ {
26
+ run("/yml/test.yml", "/data/test-semicolon.csv");
27
+ }
28
+
29
+ @Test
30
+ public void testTasks() throws Exception
31
+ {
32
+ run("/yml/test-tasks.yml", "/data/test-semicolon.csv");
33
+ }
34
+
35
+ @Test
36
+ public void testHeader() throws Exception
37
+ {
38
+ run("/yml/test-header.yml", "/data/test-semicolon.csv");
39
+ }
40
+
41
+ @Test
42
+ public void testOnlyHeader() throws Exception
43
+ {
44
+ run("/yml/test-only-header.yml", "/data/empty.csv");
45
+ }
46
+
47
+ @Test
48
+ public void testPathPrefixDirectory() throws Exception
49
+ {
50
+ run("/yml/test-path_prefix-directory.yml", "/data/test.csv");
51
+ }
52
+
53
+ @Test
54
+ public void testPathPrefixFiles() throws Exception
55
+ {
56
+ run("/yml/test-path_prefix-files.yml", "/data/test.csv");
57
+ }
58
+
59
+ @Test
60
+ public void testError1() throws Exception
61
+ {
62
+ try {
63
+ run("/yml/test-error1.yml", "/data/empty.csv");
64
+ fail("IllegalArgumentException expected.");
65
+ } catch (IllegalArgumentException e) {
66
+ System.err.println(e);
67
+ }
68
+ }
69
+
70
+ @Test
71
+ public void testError2() throws Exception
72
+ {
73
+ try {
74
+ run("/yml/test-error2.yml", "/data/empty.csv");
75
+ fail("IllegalArgumentException expected.");
76
+ } catch (IllegalArgumentException e) {
77
+ System.err.println(e);
78
+ }
79
+ }
80
+
81
+ private void run(String ymlPath, String expectedName) throws Exception
82
+ {
83
+ List<String> expected = readAll(expectedName);
84
+ Collections.sort(expected);
85
+
86
+ File file = prepare();
87
+ tester.run(ymlPath);
88
+
89
+ List<String> actual= readAll(file);
90
+ Collections.sort(actual);
91
+
92
+ assertEquals(expected, actual);
93
+ }
94
+
95
+ private File prepare() throws URISyntaxException
96
+ {
97
+ File file = new File(new File(getClass().getResource("/resource.txt").toURI()).getParentFile(), "temp");
98
+ file.mkdir();
99
+ for (File child : file.listFiles()) {
100
+ child.delete();
101
+ }
102
+ return file;
103
+ }
104
+
105
+ private List<String> readAll(String name) throws IOException, URISyntaxException
106
+ {
107
+ return readAll(new File(getClass().getResource(name).toURI()));
108
+ }
109
+
110
+ private List<String> readAll(File file) throws IOException
111
+ {
112
+ if (file.isFile()) {
113
+ FileSystem fs = FileSystems.getDefault();
114
+ Charset charset = Charset.forName("UTF-8");
115
+ return Files.readAllLines(fs.getPath(file.getAbsolutePath()), charset);
116
+ }
117
+
118
+ if (file.isDirectory()) {
119
+ List<String> lines = new ArrayList<String>();
120
+ for (File child : file.listFiles()) {
121
+ lines.addAll(readAll(child));
122
+ }
123
+ return lines;
124
+ }
125
+
126
+ return Collections.emptyList();
127
+ }
128
+
129
+ }
@@ -1,78 +1,78 @@
1
- package org.embulk.input.filesplit;
2
-
3
- import static org.junit.Assert.assertEquals;
4
-
5
- import java.io.BufferedReader;
6
- import java.io.File;
7
- import java.io.IOException;
8
- import java.io.InputStream;
9
- import java.io.InputStreamReader;
10
- import java.net.URISyntaxException;
11
-
12
- import org.embulk.input.filesplit.LocalFileSplitInputPlugin.LocalFileSplitInput.FileSplitProvider;
13
- import org.junit.Test;
14
-
15
- public class LocalFileSplitInputTest {
16
-
17
- @Test
18
- public void testHeader() throws Exception
19
- {
20
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 20)))) {
21
- assertEquals("id,name,value", reader.readLine());
22
- assertEquals("1,aaaaa,12345", reader.readLine());
23
- assertEquals(null, reader.readLine());
24
- }
25
-
26
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 10)))) {
27
- assertEquals("id,name,value", reader.readLine());
28
- assertEquals(null, reader.readLine());
29
- }
30
-
31
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 2)))) {
32
- assertEquals("id,name,value", reader.readLine());
33
- assertEquals(null, reader.readLine());
34
- }
35
-
36
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 20)))) {
37
- assertEquals("id,name,value", reader.readLine());
38
- assertEquals("1,aaaaa,12345", reader.readLine());
39
- assertEquals(null, reader.readLine());
40
- }
41
-
42
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 40)))) {
43
- assertEquals("id,name,value", reader.readLine());
44
- assertEquals("1,aaaaa,12345", reader.readLine());
45
- assertEquals("2,bbb,67890", reader.readLine());
46
- assertEquals(null, reader.readLine());
47
- }
48
-
49
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 20, 40)))) {
50
- assertEquals("id,name,value", reader.readLine());
51
- assertEquals("2,bbb,67890", reader.readLine());
52
- assertEquals(null, reader.readLine());
53
- }
54
- }
55
-
56
- @Test
57
- public void testOnlyHeader() throws Exception
58
- {
59
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 0, 10)))) {
60
- assertEquals("id,name,value", reader.readLine());
61
- assertEquals(null, reader.readLine());
62
- }
63
-
64
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 1, 10)))) {
65
- assertEquals("id,name,value", reader.readLine());
66
- assertEquals(null, reader.readLine());
67
- }
68
- }
69
-
70
- private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
71
- {
72
- File path = new File(getClass().getResource(name).toURI());
73
- try (FileSplitProvider provider = new FileSplitProvider(new PartialFile(path.getAbsolutePath(), start, end), true)) {
74
- return provider.openNext();
75
- }
76
- }
77
-
78
- }
1
+ package org.embulk.input.filesplit;
2
+
3
+ import static org.junit.Assert.assertEquals;
4
+
5
+ import java.io.BufferedReader;
6
+ import java.io.File;
7
+ import java.io.IOException;
8
+ import java.io.InputStream;
9
+ import java.io.InputStreamReader;
10
+ import java.net.URISyntaxException;
11
+
12
+ import org.embulk.input.filesplit.LocalFileSplitInputPlugin.LocalFileSplitInput.FileSplitProvider;
13
+ import org.junit.Test;
14
+
15
+ public class LocalFileSplitInputTest {
16
+
17
+ @Test
18
+ public void testHeader() throws Exception
19
+ {
20
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 20)))) {
21
+ assertEquals("id,name,value", reader.readLine());
22
+ assertEquals("1,aaaaa,12345", reader.readLine());
23
+ assertEquals(null, reader.readLine());
24
+ }
25
+
26
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 10)))) {
27
+ assertEquals("id,name,value", reader.readLine());
28
+ assertEquals(null, reader.readLine());
29
+ }
30
+
31
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 2)))) {
32
+ assertEquals("id,name,value", reader.readLine());
33
+ assertEquals(null, reader.readLine());
34
+ }
35
+
36
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 20)))) {
37
+ assertEquals("id,name,value", reader.readLine());
38
+ assertEquals("1,aaaaa,12345", reader.readLine());
39
+ assertEquals(null, reader.readLine());
40
+ }
41
+
42
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 40)))) {
43
+ assertEquals("id,name,value", reader.readLine());
44
+ assertEquals("1,aaaaa,12345", reader.readLine());
45
+ assertEquals("2,bbb,67890", reader.readLine());
46
+ assertEquals(null, reader.readLine());
47
+ }
48
+
49
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 20, 40)))) {
50
+ assertEquals("id,name,value", reader.readLine());
51
+ assertEquals("2,bbb,67890", reader.readLine());
52
+ assertEquals(null, reader.readLine());
53
+ }
54
+ }
55
+
56
+ @Test
57
+ public void testOnlyHeader() throws Exception
58
+ {
59
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 0, 10)))) {
60
+ assertEquals("id,name,value", reader.readLine());
61
+ assertEquals(null, reader.readLine());
62
+ }
63
+
64
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 1, 10)))) {
65
+ assertEquals("id,name,value", reader.readLine());
66
+ assertEquals(null, reader.readLine());
67
+ }
68
+ }
69
+
70
+ private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
71
+ {
72
+ File path = new File(getClass().getResource(name).toURI());
73
+ try (FileSplitProvider provider = new FileSplitProvider(new PartialFile(path.getAbsolutePath(), start, end), true)) {
74
+ return provider.openNext();
75
+ }
76
+ }
77
+
78
+ }