embulk-input-filesplit 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +41 -40
  3. data/build.gradle +64 -64
  4. data/classpath/embulk-input-filesplit-0.1.4.jar +0 -0
  5. data/lib/embulk/input/filesplit.rb +3 -3
  6. data/src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java +300 -187
  7. data/src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java +70 -70
  8. data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java +129 -94
  9. data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java +78 -78
  10. data/src/test/java/org/embulk/input/filesplit/PartialFileInputStreamTest.java +570 -570
  11. data/src/test/resources/data/sub1/test1.csv +1 -0
  12. data/src/test/resources/data/sub1/test2.csv +3 -0
  13. data/src/test/resources/data/sub2/test1.csv +1 -0
  14. data/src/test/resources/data/sub2/test2.csv +3 -0
  15. data/src/test/resources/data/sub2/x.csv +1 -0
  16. data/src/test/resources/data/test-header.csv +5 -5
  17. data/src/test/resources/data/test-semicolon.csv +4 -4
  18. data/src/test/resources/data/test.csv +4 -4
  19. data/src/test/resources/yml/test-error1.yml +22 -0
  20. data/src/test/resources/yml/test-error2.yml +24 -0
  21. data/src/test/resources/yml/test-header.yml +24 -24
  22. data/src/test/resources/yml/test-only-header.yml +24 -24
  23. data/src/test/resources/yml/test-path_prefix-directory.yml +23 -0
  24. data/src/test/resources/yml/test-path_prefix-files.yml +23 -0
  25. data/src/test/resources/yml/test-tasks.yml +23 -23
  26. data/src/test/resources/yml/test.yml +22 -22
  27. metadata +15 -6
  28. data/classpath/embulk-input-filesplit-0.1.3.jar +0 -0
@@ -1,70 +1,70 @@
1
- package org.embulk.input.filesplit;
2
-
3
- import java.io.BufferedReader;
4
- import java.io.BufferedWriter;
5
- import java.io.File;
6
- import java.io.FileReader;
7
- import java.io.FileWriter;
8
- import java.util.regex.Matcher;
9
- import java.util.regex.Pattern;
10
-
11
- import org.embulk.EmbulkEmbed;
12
- import org.embulk.EmbulkEmbed.Bootstrap;
13
- import org.embulk.config.ConfigSource;
14
- import org.embulk.plugin.InjectedPluginSource;
15
-
16
- import com.google.inject.Binder;
17
- import com.google.inject.Module;
18
-
19
-
20
- public class EmbulkPluginTester {
21
-
22
- private EmbulkEmbed embulk;
23
-
24
- public EmbulkPluginTester(final Class<?> iface, final String name, final Class<?> impl)
25
- {
26
- Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
27
- bootstrap.addModules(new Module()
28
- {
29
- @Override
30
- public void configure(Binder binder)
31
- {
32
- InjectedPluginSource.registerPluginTo(binder, iface, name, impl);
33
- }
34
- });
35
- embulk = bootstrap.initializeCloseable();
36
- }
37
-
38
- public void run(String ymlPath) throws Exception
39
- {
40
- ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(convert(ymlPath)));
41
- embulk.run(config);
42
- }
43
-
44
- private String convert(String yml) throws Exception
45
- {
46
- File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
47
- File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
48
- File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
49
- Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
50
- try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
51
- try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
52
- String line;
53
- while ((line = reader.readLine()) != null) {
54
- Matcher matcher = pathPrefixPattern.matcher(line);
55
- if (matcher.matches()) {
56
- int group = 2;
57
- writer.write(line.substring(0, matcher.start(group)));
58
- writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
59
- writer.write(line.substring(matcher.end(group)));
60
- } else {
61
- writer.write(line);
62
- }
63
- writer.newLine();
64
- }
65
- }
66
- }
67
- return tempYmlPath.getAbsolutePath();
68
- }
69
-
70
- }
1
+ package org.embulk.input.filesplit;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.BufferedWriter;
5
+ import java.io.File;
6
+ import java.io.FileReader;
7
+ import java.io.FileWriter;
8
+ import java.util.regex.Matcher;
9
+ import java.util.regex.Pattern;
10
+
11
+ import org.embulk.EmbulkEmbed;
12
+ import org.embulk.EmbulkEmbed.Bootstrap;
13
+ import org.embulk.config.ConfigSource;
14
+ import org.embulk.plugin.InjectedPluginSource;
15
+
16
+ import com.google.inject.Binder;
17
+ import com.google.inject.Module;
18
+
19
+
20
+ public class EmbulkPluginTester {
21
+
22
+ private EmbulkEmbed embulk;
23
+
24
+ public EmbulkPluginTester(final Class<?> iface, final String name, final Class<?> impl)
25
+ {
26
+ Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
27
+ bootstrap.addModules(new Module()
28
+ {
29
+ @Override
30
+ public void configure(Binder binder)
31
+ {
32
+ InjectedPluginSource.registerPluginTo(binder, iface, name, impl);
33
+ }
34
+ });
35
+ embulk = bootstrap.initializeCloseable();
36
+ }
37
+
38
+ public void run(String ymlPath) throws Exception
39
+ {
40
+ ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(convert(ymlPath)));
41
+ embulk.run(config);
42
+ }
43
+
44
+ private String convert(String yml) throws Exception
45
+ {
46
+ File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
47
+ File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
48
+ File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
49
+ Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
50
+ try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
51
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
52
+ String line;
53
+ while ((line = reader.readLine()) != null) {
54
+ Matcher matcher = pathPrefixPattern.matcher(line);
55
+ if (matcher.matches()) {
56
+ int group = 2;
57
+ writer.write(line.substring(0, matcher.start(group)));
58
+ writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
59
+ writer.write(line.substring(matcher.end(group)));
60
+ } else {
61
+ writer.write(line);
62
+ }
63
+ writer.newLine();
64
+ }
65
+ }
66
+ }
67
+ return tempYmlPath.getAbsolutePath();
68
+ }
69
+
70
+ }
@@ -1,94 +1,129 @@
1
- package org.embulk.input.filesplit;
2
-
3
- import java.io.File;
4
- import java.io.IOException;
5
- import java.net.URISyntaxException;
6
- import java.nio.charset.Charset;
7
- import java.nio.file.FileSystem;
8
- import java.nio.file.FileSystems;
9
- import java.nio.file.Files;
10
- import java.util.ArrayList;
11
- import java.util.Collections;
12
- import java.util.List;
13
-
14
- import org.embulk.spi.InputPlugin;
15
- import org.junit.Test;
16
- import static org.junit.Assert.assertEquals;
17
-
18
- public class LocalFileSplitInputPluginTest {
19
-
20
- private static EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
21
-
22
- @Test
23
- public void test() throws Exception
24
- {
25
- run("/yml/test.yml", "/data/test-semicolon.csv");
26
- }
27
-
28
- @Test
29
- public void testTasks() throws Exception
30
- {
31
- run("/yml/test-tasks.yml", "/data/test-semicolon.csv");
32
- }
33
-
34
- @Test
35
- public void testHeader() throws Exception
36
- {
37
- run("/yml/test-header.yml", "/data/test-semicolon.csv");
38
- }
39
-
40
- @Test
41
- public void testOnlyHeader() throws Exception
42
- {
43
- run("/yml/test-only-header.yml", "/data/empty.csv");
44
- }
45
-
46
- private void run(String ymlPath, String expectedName) throws Exception
47
- {
48
- List<String> expected = readAll(expectedName);
49
- Collections.sort(expected);
50
-
51
- File file = prepare();
52
- tester.run(ymlPath);
53
-
54
- List<String> actual= readAll(file);
55
- Collections.sort(actual);
56
-
57
- assertEquals(expected, actual);
58
- }
59
-
60
- private File prepare() throws URISyntaxException
61
- {
62
- File file = new File(new File(getClass().getResource("/resource.txt").toURI()).getParentFile(), "temp");
63
- file.mkdir();
64
- for (File child : file.listFiles()) {
65
- child.delete();
66
- }
67
- return file;
68
- }
69
-
70
- private List<String> readAll(String name) throws IOException, URISyntaxException
71
- {
72
- return readAll(new File(getClass().getResource(name).toURI()));
73
- }
74
-
75
- private List<String> readAll(File file) throws IOException
76
- {
77
- if (file.isFile()) {
78
- FileSystem fs = FileSystems.getDefault();
79
- Charset charset = Charset.forName("UTF-8");
80
- return Files.readAllLines(fs.getPath(file.getAbsolutePath()), charset);
81
- }
82
-
83
- if (file.isDirectory()) {
84
- List<String> lines = new ArrayList<String>();
85
- for (File child : file.listFiles()) {
86
- lines.addAll(readAll(child));
87
- }
88
- return lines;
89
- }
90
-
91
- return Collections.emptyList();
92
- }
93
-
94
- }
1
+ package org.embulk.input.filesplit;
2
+
3
+ import java.io.File;
4
+ import java.io.IOException;
5
+ import java.net.URISyntaxException;
6
+ import java.nio.charset.Charset;
7
+ import java.nio.file.FileSystem;
8
+ import java.nio.file.FileSystems;
9
+ import java.nio.file.Files;
10
+ import java.util.ArrayList;
11
+ import java.util.Collections;
12
+ import java.util.List;
13
+
14
+ import org.embulk.spi.InputPlugin;
15
+ import org.junit.Test;
16
+ import static org.junit.Assert.assertEquals;
17
+ import static org.junit.Assert.fail;
18
+
19
+ public class LocalFileSplitInputPluginTest {
20
+
21
+ private static EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
22
+
23
+ @Test
24
+ public void test() throws Exception
25
+ {
26
+ run("/yml/test.yml", "/data/test-semicolon.csv");
27
+ }
28
+
29
+ @Test
30
+ public void testTasks() throws Exception
31
+ {
32
+ run("/yml/test-tasks.yml", "/data/test-semicolon.csv");
33
+ }
34
+
35
+ @Test
36
+ public void testHeader() throws Exception
37
+ {
38
+ run("/yml/test-header.yml", "/data/test-semicolon.csv");
39
+ }
40
+
41
+ @Test
42
+ public void testOnlyHeader() throws Exception
43
+ {
44
+ run("/yml/test-only-header.yml", "/data/empty.csv");
45
+ }
46
+
47
+ @Test
48
+ public void testPathPrefixDirectory() throws Exception
49
+ {
50
+ run("/yml/test-path_prefix-directory.yml", "/data/test.csv");
51
+ }
52
+
53
+ @Test
54
+ public void testPathPrefixFiles() throws Exception
55
+ {
56
+ run("/yml/test-path_prefix-files.yml", "/data/test.csv");
57
+ }
58
+
59
+ @Test
60
+ public void testError1() throws Exception
61
+ {
62
+ try {
63
+ run("/yml/test-error1.yml", "/data/empty.csv");
64
+ fail("IllegalArgumentException expected.");
65
+ } catch (IllegalArgumentException e) {
66
+ System.err.println(e);
67
+ }
68
+ }
69
+
70
+ @Test
71
+ public void testError2() throws Exception
72
+ {
73
+ try {
74
+ run("/yml/test-error2.yml", "/data/empty.csv");
75
+ fail("IllegalArgumentException expected.");
76
+ } catch (IllegalArgumentException e) {
77
+ System.err.println(e);
78
+ }
79
+ }
80
+
81
+ private void run(String ymlPath, String expectedName) throws Exception
82
+ {
83
+ List<String> expected = readAll(expectedName);
84
+ Collections.sort(expected);
85
+
86
+ File file = prepare();
87
+ tester.run(ymlPath);
88
+
89
+ List<String> actual= readAll(file);
90
+ Collections.sort(actual);
91
+
92
+ assertEquals(expected, actual);
93
+ }
94
+
95
+ private File prepare() throws URISyntaxException
96
+ {
97
+ File file = new File(new File(getClass().getResource("/resource.txt").toURI()).getParentFile(), "temp");
98
+ file.mkdir();
99
+ for (File child : file.listFiles()) {
100
+ child.delete();
101
+ }
102
+ return file;
103
+ }
104
+
105
+ private List<String> readAll(String name) throws IOException, URISyntaxException
106
+ {
107
+ return readAll(new File(getClass().getResource(name).toURI()));
108
+ }
109
+
110
+ private List<String> readAll(File file) throws IOException
111
+ {
112
+ if (file.isFile()) {
113
+ FileSystem fs = FileSystems.getDefault();
114
+ Charset charset = Charset.forName("UTF-8");
115
+ return Files.readAllLines(fs.getPath(file.getAbsolutePath()), charset);
116
+ }
117
+
118
+ if (file.isDirectory()) {
119
+ List<String> lines = new ArrayList<String>();
120
+ for (File child : file.listFiles()) {
121
+ lines.addAll(readAll(child));
122
+ }
123
+ return lines;
124
+ }
125
+
126
+ return Collections.emptyList();
127
+ }
128
+
129
+ }
@@ -1,78 +1,78 @@
1
- package org.embulk.input.filesplit;
2
-
3
- import static org.junit.Assert.assertEquals;
4
-
5
- import java.io.BufferedReader;
6
- import java.io.File;
7
- import java.io.IOException;
8
- import java.io.InputStream;
9
- import java.io.InputStreamReader;
10
- import java.net.URISyntaxException;
11
-
12
- import org.embulk.input.filesplit.LocalFileSplitInputPlugin.LocalFileSplitInput.FileSplitProvider;
13
- import org.junit.Test;
14
-
15
- public class LocalFileSplitInputTest {
16
-
17
- @Test
18
- public void testHeader() throws Exception
19
- {
20
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 20)))) {
21
- assertEquals("id,name,value", reader.readLine());
22
- assertEquals("1,aaaaa,12345", reader.readLine());
23
- assertEquals(null, reader.readLine());
24
- }
25
-
26
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 10)))) {
27
- assertEquals("id,name,value", reader.readLine());
28
- assertEquals(null, reader.readLine());
29
- }
30
-
31
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 2)))) {
32
- assertEquals("id,name,value", reader.readLine());
33
- assertEquals(null, reader.readLine());
34
- }
35
-
36
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 20)))) {
37
- assertEquals("id,name,value", reader.readLine());
38
- assertEquals("1,aaaaa,12345", reader.readLine());
39
- assertEquals(null, reader.readLine());
40
- }
41
-
42
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 40)))) {
43
- assertEquals("id,name,value", reader.readLine());
44
- assertEquals("1,aaaaa,12345", reader.readLine());
45
- assertEquals("2,bbb,67890", reader.readLine());
46
- assertEquals(null, reader.readLine());
47
- }
48
-
49
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 20, 40)))) {
50
- assertEquals("id,name,value", reader.readLine());
51
- assertEquals("2,bbb,67890", reader.readLine());
52
- assertEquals(null, reader.readLine());
53
- }
54
- }
55
-
56
- @Test
57
- public void testOnlyHeader() throws Exception
58
- {
59
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 0, 10)))) {
60
- assertEquals("id,name,value", reader.readLine());
61
- assertEquals(null, reader.readLine());
62
- }
63
-
64
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 1, 10)))) {
65
- assertEquals("id,name,value", reader.readLine());
66
- assertEquals(null, reader.readLine());
67
- }
68
- }
69
-
70
- private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
71
- {
72
- File path = new File(getClass().getResource(name).toURI());
73
- try (FileSplitProvider provider = new FileSplitProvider(new PartialFile(path.getAbsolutePath(), start, end), true)) {
74
- return provider.openNext();
75
- }
76
- }
77
-
78
- }
1
+ package org.embulk.input.filesplit;
2
+
3
+ import static org.junit.Assert.assertEquals;
4
+
5
+ import java.io.BufferedReader;
6
+ import java.io.File;
7
+ import java.io.IOException;
8
+ import java.io.InputStream;
9
+ import java.io.InputStreamReader;
10
+ import java.net.URISyntaxException;
11
+
12
+ import org.embulk.input.filesplit.LocalFileSplitInputPlugin.LocalFileSplitInput.FileSplitProvider;
13
+ import org.junit.Test;
14
+
15
+ public class LocalFileSplitInputTest {
16
+
17
+ @Test
18
+ public void testHeader() throws Exception
19
+ {
20
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 20)))) {
21
+ assertEquals("id,name,value", reader.readLine());
22
+ assertEquals("1,aaaaa,12345", reader.readLine());
23
+ assertEquals(null, reader.readLine());
24
+ }
25
+
26
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 0, 10)))) {
27
+ assertEquals("id,name,value", reader.readLine());
28
+ assertEquals(null, reader.readLine());
29
+ }
30
+
31
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 2)))) {
32
+ assertEquals("id,name,value", reader.readLine());
33
+ assertEquals(null, reader.readLine());
34
+ }
35
+
36
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 20)))) {
37
+ assertEquals("id,name,value", reader.readLine());
38
+ assertEquals("1,aaaaa,12345", reader.readLine());
39
+ assertEquals(null, reader.readLine());
40
+ }
41
+
42
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 40)))) {
43
+ assertEquals("id,name,value", reader.readLine());
44
+ assertEquals("1,aaaaa,12345", reader.readLine());
45
+ assertEquals("2,bbb,67890", reader.readLine());
46
+ assertEquals(null, reader.readLine());
47
+ }
48
+
49
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 20, 40)))) {
50
+ assertEquals("id,name,value", reader.readLine());
51
+ assertEquals("2,bbb,67890", reader.readLine());
52
+ assertEquals(null, reader.readLine());
53
+ }
54
+ }
55
+
56
+ @Test
57
+ public void testOnlyHeader() throws Exception
58
+ {
59
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 0, 10)))) {
60
+ assertEquals("id,name,value", reader.readLine());
61
+ assertEquals(null, reader.readLine());
62
+ }
63
+
64
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 1, 10)))) {
65
+ assertEquals("id,name,value", reader.readLine());
66
+ assertEquals(null, reader.readLine());
67
+ }
68
+ }
69
+
70
+ private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
71
+ {
72
+ File path = new File(getClass().getResource(name).toURI());
73
+ try (FileSplitProvider provider = new FileSplitProvider(new PartialFile(path.getAbsolutePath(), start, end), true)) {
74
+ return provider.openNext();
75
+ }
76
+ }
77
+
78
+ }