embulk-input-filesplit 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +4 -4
- data/classpath/embulk-input-filesplit-0.1.3.jar +0 -0
- data/src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java +187 -187
- data/src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java +50 -75
- data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java +19 -19
- data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java +9 -12
- data/src/test/resources/data/test-semicolon.csv +4 -0
- metadata +4 -4
- data/classpath/embulk-input-filesplit-0.1.2.jar +0 -0
- data/src/test/java/org/embulk/input/filesplit/EmptyConfigSource.java +0 -107
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6c43f390ee5dd1de19e4481dd2443dc3d3d63c0
|
4
|
+
data.tar.gz: 7fb23bbe679ffa5fc730e3f553a1c14c2633f381
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a55da85aa17a112889765c8ec8281e1816c40e00a849032e72ca4618d85c1dda1c759cf178a5bffc5b1f3bd73f918c9eb2e3461f825eedb30b5010810273e833
|
7
|
+
data.tar.gz: eb261354f866e3e4833815fd19c9634a380283dbadf9a47dcb8f12eb6135f502ee0c7d8e13f10c973b91f06fddb77e479f243bf8520e9b2f2338c14b388f35ce
|
data/build.gradle
CHANGED
@@ -10,7 +10,7 @@ apply plugin: 'com.github.jruby-gradle.base'
|
|
10
10
|
|
11
11
|
[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
|
12
12
|
|
13
|
-
project.version = '0.1.
|
13
|
+
project.version = '0.1.3'
|
14
14
|
|
15
15
|
repositories {
|
16
16
|
mavenCentral()
|
@@ -22,9 +22,9 @@ configurations {
|
|
22
22
|
}
|
23
23
|
|
24
24
|
dependencies {
|
25
|
-
compile 'org.embulk:embulk-core:0.
|
26
|
-
provided 'org.embulk:embulk-core:0.
|
27
|
-
testCompile 'org.embulk:embulk-standards:0.
|
25
|
+
compile 'org.embulk:embulk-core:0.7.4'
|
26
|
+
provided 'org.embulk:embulk-core:0.7.4'
|
27
|
+
testCompile 'org.embulk:embulk-standards:0.7.4'
|
28
28
|
testCompile 'junit:junit:4.+'
|
29
29
|
}
|
30
30
|
|
Binary file
|
@@ -1,187 +1,187 @@
|
|
1
|
-
package org.embulk.input.filesplit;
|
2
|
-
|
3
|
-
import java.io.BufferedInputStream;
|
4
|
-
import java.io.ByteArrayInputStream;
|
5
|
-
import java.io.ByteArrayOutputStream;
|
6
|
-
import java.io.File;
|
7
|
-
import java.io.FileInputStream;
|
8
|
-
import java.io.IOException;
|
9
|
-
import java.io.InputStream;
|
10
|
-
import java.io.SequenceInputStream;
|
11
|
-
import java.util.ArrayList;
|
12
|
-
import java.util.List;
|
13
|
-
|
14
|
-
import org.embulk.config.
|
15
|
-
import org.embulk.config.
|
16
|
-
import org.embulk.config.
|
17
|
-
import org.embulk.config.
|
18
|
-
import org.embulk.config.
|
19
|
-
import org.embulk.config.
|
20
|
-
import org.embulk.config.
|
21
|
-
import org.embulk.config.TaskSource;
|
22
|
-
import org.embulk.spi.BufferAllocator;
|
23
|
-
import org.embulk.spi.Exec;
|
24
|
-
import org.embulk.spi.FileInputPlugin;
|
25
|
-
import org.embulk.spi.TransactionalFileInput;
|
26
|
-
import org.embulk.spi.util.InputStreamFileInput;
|
27
|
-
|
28
|
-
import com.google.common.base.Optional;
|
29
|
-
|
30
|
-
|
31
|
-
public class LocalFileSplitInputPlugin
|
32
|
-
implements FileInputPlugin
|
33
|
-
{
|
34
|
-
public interface PluginTask
|
35
|
-
extends Task
|
36
|
-
{
|
37
|
-
@Config("path")
|
38
|
-
public String getPath();
|
39
|
-
|
40
|
-
@Config("tasks")
|
41
|
-
@ConfigDefault("null")
|
42
|
-
public Optional<Integer> getTasks();
|
43
|
-
|
44
|
-
@Config("header_line")
|
45
|
-
@ConfigDefault("false")
|
46
|
-
public boolean getHeaderLine();
|
47
|
-
|
48
|
-
public List<PartialFile> getFiles();
|
49
|
-
public void setFiles(List<PartialFile> files);
|
50
|
-
|
51
|
-
@ConfigInject
|
52
|
-
public BufferAllocator getBufferAllocator();
|
53
|
-
}
|
54
|
-
|
55
|
-
@Override
|
56
|
-
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
57
|
-
{
|
58
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
59
|
-
|
60
|
-
int tasks;
|
61
|
-
if (task.getTasks().isPresent()) {
|
62
|
-
tasks = task.getTasks().get();
|
63
|
-
if (tasks <= 0) {
|
64
|
-
throw new IllegalArgumentException(String.format("'tasks' is %d but must be greater than 0", tasks));
|
65
|
-
}
|
66
|
-
} else {
|
67
|
-
tasks = Runtime.getRuntime().availableProcessors() * 2;
|
68
|
-
}
|
69
|
-
|
70
|
-
long size = new File(task.getPath()).length();
|
71
|
-
List<PartialFile> files = new ArrayList<PartialFile>();
|
72
|
-
for (int i = 0; i < tasks; i++) {
|
73
|
-
long start = size * i / tasks;
|
74
|
-
long end = size * (i + 1) / tasks;
|
75
|
-
if (start < end) {
|
76
|
-
files.add(new PartialFile(task.getPath(), start, end));
|
77
|
-
}
|
78
|
-
}
|
79
|
-
|
80
|
-
task.setFiles(files);
|
81
|
-
|
82
|
-
return resume(task.dump(), task.getFiles().size(), control);
|
83
|
-
}
|
84
|
-
|
85
|
-
@Override
|
86
|
-
public ConfigDiff resume(TaskSource taskSource,
|
87
|
-
int taskCount,
|
88
|
-
FileInputPlugin.Control control)
|
89
|
-
{
|
90
|
-
control.run(taskSource, taskCount);
|
91
|
-
|
92
|
-
return Exec.newConfigDiff();
|
93
|
-
}
|
94
|
-
|
95
|
-
@Override
|
96
|
-
public void cleanup(TaskSource taskSource,
|
97
|
-
int taskCount,
|
98
|
-
List<
|
99
|
-
{ }
|
100
|
-
|
101
|
-
@Override
|
102
|
-
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
103
|
-
{
|
104
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
105
|
-
return new LocalFileSplitInput(task, taskIndex);
|
106
|
-
}
|
107
|
-
|
108
|
-
public static class LocalFileSplitInput
|
109
|
-
extends InputStreamFileInput
|
110
|
-
implements TransactionalFileInput
|
111
|
-
{
|
112
|
-
public static class FileSplitProvider
|
113
|
-
implements InputStreamFileInput.Provider
|
114
|
-
{
|
115
|
-
private final PartialFile file;
|
116
|
-
private final boolean hasHeader;
|
117
|
-
private boolean opened = false;
|
118
|
-
|
119
|
-
public FileSplitProvider(PartialFile file, boolean hasHeader)
|
120
|
-
{
|
121
|
-
this.file = file;
|
122
|
-
this.hasHeader = hasHeader;
|
123
|
-
}
|
124
|
-
|
125
|
-
@Override
|
126
|
-
public InputStream openNext() throws IOException
|
127
|
-
{
|
128
|
-
if (opened) {
|
129
|
-
return null;
|
130
|
-
}
|
131
|
-
opened = true;
|
132
|
-
|
133
|
-
InputStream in = new PartialFileInputStream(new FileInputStream(file.getPath()), file.getStart(), file.getEnd());
|
134
|
-
if (file.getStart() > 0 && hasHeader) {
|
135
|
-
in = new SequenceInputStream(openHeader(file.getPath()), in);
|
136
|
-
}
|
137
|
-
return in;
|
138
|
-
}
|
139
|
-
|
140
|
-
@Override
|
141
|
-
public void close() { }
|
142
|
-
|
143
|
-
private InputStream openHeader(String path) throws IOException
|
144
|
-
{
|
145
|
-
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
146
|
-
try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(path))) {
|
147
|
-
while (true) {
|
148
|
-
int c = in.read();
|
149
|
-
if (c < 0) {
|
150
|
-
break;
|
151
|
-
}
|
152
|
-
|
153
|
-
header.write(c);
|
154
|
-
|
155
|
-
if (c == '\n') {
|
156
|
-
break;
|
157
|
-
}
|
158
|
-
|
159
|
-
if (c == '\r') {
|
160
|
-
int c2 = in.read();
|
161
|
-
if (c2 == '\n') {
|
162
|
-
header.write(c2);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
}
|
166
|
-
}
|
167
|
-
}
|
168
|
-
header.close();
|
169
|
-
return new ByteArrayInputStream(header.toByteArray());
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
public LocalFileSplitInput(PluginTask task, int taskIndex)
|
174
|
-
{
|
175
|
-
super(task.getBufferAllocator(), new FileSplitProvider(task.getFiles().get(taskIndex), task.getHeaderLine()));
|
176
|
-
}
|
177
|
-
|
178
|
-
@Override
|
179
|
-
public void abort() { }
|
180
|
-
|
181
|
-
@Override
|
182
|
-
public
|
183
|
-
{
|
184
|
-
return Exec.
|
185
|
-
}
|
186
|
-
}
|
187
|
-
}
|
1
|
+
package org.embulk.input.filesplit;
|
2
|
+
|
3
|
+
import java.io.BufferedInputStream;
|
4
|
+
import java.io.ByteArrayInputStream;
|
5
|
+
import java.io.ByteArrayOutputStream;
|
6
|
+
import java.io.File;
|
7
|
+
import java.io.FileInputStream;
|
8
|
+
import java.io.IOException;
|
9
|
+
import java.io.InputStream;
|
10
|
+
import java.io.SequenceInputStream;
|
11
|
+
import java.util.ArrayList;
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
import org.embulk.config.Config;
|
15
|
+
import org.embulk.config.ConfigDefault;
|
16
|
+
import org.embulk.config.ConfigDiff;
|
17
|
+
import org.embulk.config.ConfigInject;
|
18
|
+
import org.embulk.config.ConfigSource;
|
19
|
+
import org.embulk.config.Task;
|
20
|
+
import org.embulk.config.TaskReport;
|
21
|
+
import org.embulk.config.TaskSource;
|
22
|
+
import org.embulk.spi.BufferAllocator;
|
23
|
+
import org.embulk.spi.Exec;
|
24
|
+
import org.embulk.spi.FileInputPlugin;
|
25
|
+
import org.embulk.spi.TransactionalFileInput;
|
26
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
27
|
+
|
28
|
+
import com.google.common.base.Optional;
|
29
|
+
|
30
|
+
|
31
|
+
public class LocalFileSplitInputPlugin
|
32
|
+
implements FileInputPlugin
|
33
|
+
{
|
34
|
+
public interface PluginTask
|
35
|
+
extends Task
|
36
|
+
{
|
37
|
+
@Config("path")
|
38
|
+
public String getPath();
|
39
|
+
|
40
|
+
@Config("tasks")
|
41
|
+
@ConfigDefault("null")
|
42
|
+
public Optional<Integer> getTasks();
|
43
|
+
|
44
|
+
@Config("header_line")
|
45
|
+
@ConfigDefault("false")
|
46
|
+
public boolean getHeaderLine();
|
47
|
+
|
48
|
+
public List<PartialFile> getFiles();
|
49
|
+
public void setFiles(List<PartialFile> files);
|
50
|
+
|
51
|
+
@ConfigInject
|
52
|
+
public BufferAllocator getBufferAllocator();
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
57
|
+
{
|
58
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
59
|
+
|
60
|
+
int tasks;
|
61
|
+
if (task.getTasks().isPresent()) {
|
62
|
+
tasks = task.getTasks().get();
|
63
|
+
if (tasks <= 0) {
|
64
|
+
throw new IllegalArgumentException(String.format("'tasks' is %d but must be greater than 0", tasks));
|
65
|
+
}
|
66
|
+
} else {
|
67
|
+
tasks = Runtime.getRuntime().availableProcessors() * 2;
|
68
|
+
}
|
69
|
+
|
70
|
+
long size = new File(task.getPath()).length();
|
71
|
+
List<PartialFile> files = new ArrayList<PartialFile>();
|
72
|
+
for (int i = 0; i < tasks; i++) {
|
73
|
+
long start = size * i / tasks;
|
74
|
+
long end = size * (i + 1) / tasks;
|
75
|
+
if (start < end) {
|
76
|
+
files.add(new PartialFile(task.getPath(), start, end));
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
task.setFiles(files);
|
81
|
+
|
82
|
+
return resume(task.dump(), task.getFiles().size(), control);
|
83
|
+
}
|
84
|
+
|
85
|
+
@Override
|
86
|
+
public ConfigDiff resume(TaskSource taskSource,
|
87
|
+
int taskCount,
|
88
|
+
FileInputPlugin.Control control)
|
89
|
+
{
|
90
|
+
control.run(taskSource, taskCount);
|
91
|
+
|
92
|
+
return Exec.newConfigDiff();
|
93
|
+
}
|
94
|
+
|
95
|
+
@Override
|
96
|
+
public void cleanup(TaskSource taskSource,
|
97
|
+
int taskCount,
|
98
|
+
List<TaskReport> successTaskReports)
|
99
|
+
{ }
|
100
|
+
|
101
|
+
@Override
|
102
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
103
|
+
{
|
104
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
105
|
+
return new LocalFileSplitInput(task, taskIndex);
|
106
|
+
}
|
107
|
+
|
108
|
+
public static class LocalFileSplitInput
|
109
|
+
extends InputStreamFileInput
|
110
|
+
implements TransactionalFileInput
|
111
|
+
{
|
112
|
+
public static class FileSplitProvider
|
113
|
+
implements InputStreamFileInput.Provider
|
114
|
+
{
|
115
|
+
private final PartialFile file;
|
116
|
+
private final boolean hasHeader;
|
117
|
+
private boolean opened = false;
|
118
|
+
|
119
|
+
public FileSplitProvider(PartialFile file, boolean hasHeader)
|
120
|
+
{
|
121
|
+
this.file = file;
|
122
|
+
this.hasHeader = hasHeader;
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public InputStream openNext() throws IOException
|
127
|
+
{
|
128
|
+
if (opened) {
|
129
|
+
return null;
|
130
|
+
}
|
131
|
+
opened = true;
|
132
|
+
|
133
|
+
InputStream in = new PartialFileInputStream(new FileInputStream(file.getPath()), file.getStart(), file.getEnd());
|
134
|
+
if (file.getStart() > 0 && hasHeader) {
|
135
|
+
in = new SequenceInputStream(openHeader(file.getPath()), in);
|
136
|
+
}
|
137
|
+
return in;
|
138
|
+
}
|
139
|
+
|
140
|
+
@Override
|
141
|
+
public void close() { }
|
142
|
+
|
143
|
+
private InputStream openHeader(String path) throws IOException
|
144
|
+
{
|
145
|
+
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
146
|
+
try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(path))) {
|
147
|
+
while (true) {
|
148
|
+
int c = in.read();
|
149
|
+
if (c < 0) {
|
150
|
+
break;
|
151
|
+
}
|
152
|
+
|
153
|
+
header.write(c);
|
154
|
+
|
155
|
+
if (c == '\n') {
|
156
|
+
break;
|
157
|
+
}
|
158
|
+
|
159
|
+
if (c == '\r') {
|
160
|
+
int c2 = in.read();
|
161
|
+
if (c2 == '\n') {
|
162
|
+
header.write(c2);
|
163
|
+
}
|
164
|
+
break;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
header.close();
|
169
|
+
return new ByteArrayInputStream(header.toByteArray());
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
public LocalFileSplitInput(PluginTask task, int taskIndex)
|
174
|
+
{
|
175
|
+
super(task.getBufferAllocator(), new FileSplitProvider(task.getFiles().get(taskIndex), task.getHeaderLine()));
|
176
|
+
}
|
177
|
+
|
178
|
+
@Override
|
179
|
+
public void abort() { }
|
180
|
+
|
181
|
+
@Override
|
182
|
+
public TaskReport commit()
|
183
|
+
{
|
184
|
+
return Exec.newTaskReport();
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
@@ -5,91 +5,66 @@ import java.io.BufferedWriter;
|
|
5
5
|
import java.io.File;
|
6
6
|
import java.io.FileReader;
|
7
7
|
import java.io.FileWriter;
|
8
|
-
import java.io.IOException;
|
9
|
-
import java.net.URISyntaxException;
|
10
|
-
import java.util.Arrays;
|
11
8
|
import java.util.regex.Matcher;
|
12
9
|
import java.util.regex.Pattern;
|
13
10
|
|
14
|
-
import org.embulk.
|
15
|
-
import org.embulk.
|
11
|
+
import org.embulk.EmbulkEmbed;
|
12
|
+
import org.embulk.EmbulkEmbed.Bootstrap;
|
16
13
|
import org.embulk.config.ConfigSource;
|
17
|
-
import org.embulk.exec.ExecutionResult;
|
18
|
-
import org.embulk.exec.LocalExecutor;
|
19
14
|
import org.embulk.plugin.InjectedPluginSource;
|
20
|
-
import org.embulk.spi.ExecSession;
|
21
15
|
|
22
16
|
import com.google.inject.Binder;
|
23
|
-
import com.google.inject.Injector;
|
24
17
|
import com.google.inject.Module;
|
25
18
|
|
19
|
+
|
26
20
|
public class EmbulkPluginTester {
|
27
|
-
|
28
|
-
private final Class<?> iface;
|
29
|
-
private final String name;
|
30
|
-
private final Class<?> impl;
|
31
|
-
|
32
|
-
|
33
|
-
public EmbulkPluginTester(Class<?> iface, String name, Class<?> impl)
|
34
|
-
{
|
35
|
-
this.iface = iface;
|
36
|
-
this.name = name;
|
37
|
-
this.impl = impl;
|
38
|
-
}
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
22
|
+
private EmbulkEmbed embulk;
|
23
|
+
|
24
|
+
public EmbulkPluginTester(final Class<?> iface, final String name, final Class<?> impl)
|
25
|
+
{
|
26
|
+
Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
|
27
|
+
bootstrap.addModules(new Module()
|
28
|
+
{
|
29
|
+
@Override
|
30
|
+
public void configure(Binder binder)
|
31
|
+
{
|
32
|
+
InjectedPluginSource.registerPluginTo(binder, iface, name, impl);
|
33
|
+
}
|
34
|
+
});
|
35
|
+
embulk = bootstrap.initializeCloseable();
|
36
|
+
}
|
37
|
+
|
38
|
+
public void run(String ymlPath) throws Exception
|
39
|
+
{
|
40
|
+
ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(convert(ymlPath)));
|
41
|
+
embulk.run(config);
|
42
|
+
}
|
43
|
+
|
44
|
+
private String convert(String yml) throws Exception
|
45
|
+
{
|
46
|
+
File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
|
47
|
+
File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
|
48
|
+
File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
|
49
|
+
Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
|
50
|
+
try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
|
51
|
+
try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
|
52
|
+
String line;
|
53
|
+
while ((line = reader.readLine()) != null) {
|
54
|
+
Matcher matcher = pathPrefixPattern.matcher(line);
|
55
|
+
if (matcher.matches()) {
|
56
|
+
int group = 2;
|
57
|
+
writer.write(line.substring(0, matcher.start(group)));
|
58
|
+
writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
|
59
|
+
writer.write(line.substring(matcher.end(group)));
|
60
|
+
} else {
|
61
|
+
writer.write(line);
|
62
|
+
}
|
63
|
+
writer.newLine();
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
return tempYmlPath.getAbsolutePath();
|
68
|
+
}
|
62
69
|
|
63
|
-
private File convert(String yml) {
|
64
|
-
try {
|
65
|
-
File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
|
66
|
-
File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
|
67
|
-
File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
|
68
|
-
Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
|
69
|
-
try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
|
70
|
-
try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
|
71
|
-
String line;
|
72
|
-
while ((line = reader.readLine()) != null) {
|
73
|
-
Matcher matcher = pathPrefixPattern.matcher(line);
|
74
|
-
if (matcher.matches()) {
|
75
|
-
int group = 2;
|
76
|
-
writer.write(line.substring(0, matcher.start(group)));
|
77
|
-
writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
|
78
|
-
writer.write(line.substring(matcher.end(group)));
|
79
|
-
} else {
|
80
|
-
writer.write(line);
|
81
|
-
}
|
82
|
-
writer.newLine();
|
83
|
-
}
|
84
|
-
}
|
85
|
-
}
|
86
|
-
return tempYmlPath.getAbsoluteFile();
|
87
|
-
|
88
|
-
} catch (IOException e) {
|
89
|
-
throw new RuntimeException(e);
|
90
|
-
} catch (URISyntaxException e) {
|
91
|
-
throw new RuntimeException(e);
|
92
|
-
}
|
93
|
-
}
|
94
|
-
|
95
70
|
}
|
@@ -16,33 +16,33 @@ import org.junit.Test;
|
|
16
16
|
import static org.junit.Assert.assertEquals;
|
17
17
|
|
18
18
|
public class LocalFileSplitInputPluginTest {
|
19
|
-
|
20
|
-
private EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
|
21
|
-
|
19
|
+
|
20
|
+
private static EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
|
21
|
+
|
22
22
|
@Test
|
23
23
|
public void test() throws Exception
|
24
24
|
{
|
25
|
-
run("/yml/test.yml", "/data/test.csv");
|
25
|
+
run("/yml/test.yml", "/data/test-semicolon.csv");
|
26
26
|
}
|
27
|
-
|
27
|
+
|
28
28
|
@Test
|
29
29
|
public void testTasks() throws Exception
|
30
30
|
{
|
31
|
-
run("/yml/test-tasks.yml", "/data/test.csv");
|
31
|
+
run("/yml/test-tasks.yml", "/data/test-semicolon.csv");
|
32
32
|
}
|
33
|
-
|
33
|
+
|
34
34
|
@Test
|
35
35
|
public void testHeader() throws Exception
|
36
36
|
{
|
37
|
-
run("/yml/test-header.yml", "/data/test.csv");
|
37
|
+
run("/yml/test-header.yml", "/data/test-semicolon.csv");
|
38
38
|
}
|
39
|
-
|
39
|
+
|
40
40
|
@Test
|
41
41
|
public void testOnlyHeader() throws Exception
|
42
42
|
{
|
43
43
|
run("/yml/test-only-header.yml", "/data/empty.csv");
|
44
44
|
}
|
45
|
-
|
45
|
+
|
46
46
|
private void run(String ymlPath, String expectedName) throws Exception
|
47
47
|
{
|
48
48
|
List<String> expected = readAll(expectedName);
|
@@ -50,13 +50,13 @@ public class LocalFileSplitInputPluginTest {
|
|
50
50
|
|
51
51
|
File file = prepare();
|
52
52
|
tester.run(ymlPath);
|
53
|
-
|
53
|
+
|
54
54
|
List<String> actual= readAll(file);
|
55
55
|
Collections.sort(actual);
|
56
|
-
|
56
|
+
|
57
57
|
assertEquals(expected, actual);
|
58
58
|
}
|
59
|
-
|
59
|
+
|
60
60
|
private File prepare() throws URISyntaxException
|
61
61
|
{
|
62
62
|
File file = new File(new File(getClass().getResource("/resource.txt").toURI()).getParentFile(), "temp");
|
@@ -66,20 +66,20 @@ public class LocalFileSplitInputPluginTest {
|
|
66
66
|
}
|
67
67
|
return file;
|
68
68
|
}
|
69
|
-
|
69
|
+
|
70
70
|
private List<String> readAll(String name) throws IOException, URISyntaxException
|
71
71
|
{
|
72
72
|
return readAll(new File(getClass().getResource(name).toURI()));
|
73
73
|
}
|
74
|
-
|
75
|
-
private List<String> readAll(File file) throws IOException
|
74
|
+
|
75
|
+
private List<String> readAll(File file) throws IOException
|
76
76
|
{
|
77
77
|
if (file.isFile()) {
|
78
78
|
FileSystem fs = FileSystems.getDefault();
|
79
79
|
Charset charset = Charset.forName("UTF-8");
|
80
80
|
return Files.readAllLines(fs.getPath(file.getAbsolutePath()), charset);
|
81
81
|
}
|
82
|
-
|
82
|
+
|
83
83
|
if (file.isDirectory()) {
|
84
84
|
List<String> lines = new ArrayList<String>();
|
85
85
|
for (File child : file.listFiles()) {
|
@@ -87,8 +87,8 @@ public class LocalFileSplitInputPluginTest {
|
|
87
87
|
}
|
88
88
|
return lines;
|
89
89
|
}
|
90
|
-
|
90
|
+
|
91
91
|
return Collections.emptyList();
|
92
92
|
}
|
93
|
-
|
93
|
+
|
94
94
|
}
|
@@ -1,6 +1,3 @@
|
|
1
|
-
/*
|
2
|
-
* $Id: typical.epf 2627 2010-03-18 01:40:13Z tiba $
|
3
|
-
*/
|
4
1
|
package org.embulk.input.filesplit;
|
5
2
|
|
6
3
|
import static org.junit.Assert.assertEquals;
|
@@ -16,7 +13,7 @@ import org.embulk.input.filesplit.LocalFileSplitInputPlugin.LocalFileSplitInput.
|
|
16
13
|
import org.junit.Test;
|
17
14
|
|
18
15
|
public class LocalFileSplitInputTest {
|
19
|
-
|
16
|
+
|
20
17
|
@Test
|
21
18
|
public void testHeader() throws Exception
|
22
19
|
{
|
@@ -35,27 +32,27 @@ public class LocalFileSplitInputTest {
|
|
35
32
|
assertEquals("id,name,value", reader.readLine());
|
36
33
|
assertEquals(null, reader.readLine());
|
37
34
|
}
|
38
|
-
|
35
|
+
|
39
36
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 20)))) {
|
40
37
|
assertEquals("id,name,value", reader.readLine());
|
41
38
|
assertEquals("1,aaaaa,12345", reader.readLine());
|
42
39
|
assertEquals(null, reader.readLine());
|
43
40
|
}
|
44
|
-
|
41
|
+
|
45
42
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 40)))) {
|
46
43
|
assertEquals("id,name,value", reader.readLine());
|
47
44
|
assertEquals("1,aaaaa,12345", reader.readLine());
|
48
45
|
assertEquals("2,bbb,67890", reader.readLine());
|
49
46
|
assertEquals(null, reader.readLine());
|
50
47
|
}
|
51
|
-
|
48
|
+
|
52
49
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 20, 40)))) {
|
53
50
|
assertEquals("id,name,value", reader.readLine());
|
54
51
|
assertEquals("2,bbb,67890", reader.readLine());
|
55
52
|
assertEquals(null, reader.readLine());
|
56
53
|
}
|
57
54
|
}
|
58
|
-
|
55
|
+
|
59
56
|
@Test
|
60
57
|
public void testOnlyHeader() throws Exception
|
61
58
|
{
|
@@ -63,19 +60,19 @@ public class LocalFileSplitInputTest {
|
|
63
60
|
assertEquals("id,name,value", reader.readLine());
|
64
61
|
assertEquals(null, reader.readLine());
|
65
62
|
}
|
66
|
-
|
63
|
+
|
67
64
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 1, 10)))) {
|
68
65
|
assertEquals("id,name,value", reader.readLine());
|
69
66
|
assertEquals(null, reader.readLine());
|
70
67
|
}
|
71
68
|
}
|
72
|
-
|
73
|
-
private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
|
69
|
+
|
70
|
+
private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
|
74
71
|
{
|
75
72
|
File path = new File(getClass().getResource(name).toURI());
|
76
73
|
try (FileSplitProvider provider = new FileSplitProvider(new PartialFile(path.getAbsolutePath(), start, end), true)) {
|
77
74
|
return provider.openNext();
|
78
75
|
}
|
79
76
|
}
|
80
|
-
|
77
|
+
|
81
78
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-filesplit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hitoshi Tanaka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -23,20 +23,20 @@ files:
|
|
23
23
|
- src/main/java/org/embulk/input/filesplit/PartialFile.java
|
24
24
|
- src/main/java/org/embulk/input/filesplit/PartialFileInputStream.java
|
25
25
|
- src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java
|
26
|
-
- src/test/java/org/embulk/input/filesplit/EmptyConfigSource.java
|
27
26
|
- src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java
|
28
27
|
- src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java
|
29
28
|
- src/test/java/org/embulk/input/filesplit/PartialFileInputStreamTest.java
|
30
29
|
- src/test/resources/data/empty.csv
|
31
30
|
- src/test/resources/data/test-header.csv
|
32
31
|
- src/test/resources/data/test-only-header.csv
|
32
|
+
- src/test/resources/data/test-semicolon.csv
|
33
33
|
- src/test/resources/data/test.csv
|
34
34
|
- src/test/resources/resource.txt
|
35
35
|
- src/test/resources/yml/test-header.yml
|
36
36
|
- src/test/resources/yml/test-only-header.yml
|
37
37
|
- src/test/resources/yml/test-tasks.yml
|
38
38
|
- src/test/resources/yml/test.yml
|
39
|
-
- classpath/embulk-input-filesplit-0.1.
|
39
|
+
- classpath/embulk-input-filesplit-0.1.3.jar
|
40
40
|
homepage: https://github.com/hito4t/embulk-input-filesplit
|
41
41
|
licenses:
|
42
42
|
- Apache 2.0
|
Binary file
|
@@ -1,107 +0,0 @@
|
|
1
|
-
package org.embulk.input.filesplit;
|
2
|
-
|
3
|
-
import java.util.Collections;
|
4
|
-
import java.util.List;
|
5
|
-
import java.util.Map.Entry;
|
6
|
-
|
7
|
-
import org.embulk.config.ConfigSource;
|
8
|
-
import org.embulk.config.DataSource;
|
9
|
-
|
10
|
-
import com.fasterxml.jackson.databind.JsonNode;
|
11
|
-
import com.fasterxml.jackson.databind.node.ObjectNode;
|
12
|
-
|
13
|
-
public class EmptyConfigSource implements ConfigSource
|
14
|
-
{
|
15
|
-
|
16
|
-
@Override
|
17
|
-
public <E> E get(Class<E> type, String attrName)
|
18
|
-
{
|
19
|
-
return null;
|
20
|
-
}
|
21
|
-
|
22
|
-
@Override
|
23
|
-
public <E> E get(Class<E> type, String attrName, E defaultValue)
|
24
|
-
{
|
25
|
-
return defaultValue;
|
26
|
-
}
|
27
|
-
|
28
|
-
@Override
|
29
|
-
public List<String> getAttributeNames()
|
30
|
-
{
|
31
|
-
return Collections.emptyList();
|
32
|
-
}
|
33
|
-
|
34
|
-
@Override
|
35
|
-
public Iterable<Entry<String, JsonNode>> getAttributes()
|
36
|
-
{
|
37
|
-
return Collections.emptyList();
|
38
|
-
}
|
39
|
-
|
40
|
-
@Override
|
41
|
-
public ObjectNode getObjectNode()
|
42
|
-
{
|
43
|
-
return null;
|
44
|
-
}
|
45
|
-
|
46
|
-
@Override
|
47
|
-
public boolean isEmpty()
|
48
|
-
{
|
49
|
-
return true;
|
50
|
-
}
|
51
|
-
|
52
|
-
@Override
|
53
|
-
public ConfigSource deepCopy()
|
54
|
-
{
|
55
|
-
return this;
|
56
|
-
}
|
57
|
-
|
58
|
-
@Override
|
59
|
-
public ConfigSource getNested(String s)
|
60
|
-
{
|
61
|
-
// TODO 自動生成されたメソッド・スタブ
|
62
|
-
return null;
|
63
|
-
}
|
64
|
-
|
65
|
-
@Override
|
66
|
-
public ConfigSource getNestedOrSetEmpty(String s)
|
67
|
-
{
|
68
|
-
// TODO 自動生成されたメソッド・スタブ
|
69
|
-
return null;
|
70
|
-
}
|
71
|
-
|
72
|
-
@Override
|
73
|
-
public <T> T loadConfig(Class<T> class1)
|
74
|
-
{
|
75
|
-
// TODO 自動生成されたメソッド・スタブ
|
76
|
-
return null;
|
77
|
-
}
|
78
|
-
|
79
|
-
@Override
|
80
|
-
public ConfigSource merge(DataSource datasource)
|
81
|
-
{
|
82
|
-
// TODO 自動生成されたメソッド・スタブ
|
83
|
-
return null;
|
84
|
-
}
|
85
|
-
|
86
|
-
@Override
|
87
|
-
public ConfigSource set(String s, Object obj)
|
88
|
-
{
|
89
|
-
// TODO 自動生成されたメソッド・スタブ
|
90
|
-
return null;
|
91
|
-
}
|
92
|
-
|
93
|
-
@Override
|
94
|
-
public ConfigSource setAll(DataSource datasource)
|
95
|
-
{
|
96
|
-
// TODO 自動生成されたメソッド・スタブ
|
97
|
-
return null;
|
98
|
-
}
|
99
|
-
|
100
|
-
@Override
|
101
|
-
public ConfigSource setNested(String s, DataSource datasource)
|
102
|
-
{
|
103
|
-
// TODO 自動生成されたメソッド・スタブ
|
104
|
-
return null;
|
105
|
-
}
|
106
|
-
|
107
|
-
}
|