embulk-input-filesplit 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +4 -4
- data/classpath/embulk-input-filesplit-0.1.3.jar +0 -0
- data/src/main/java/org/embulk/input/filesplit/LocalFileSplitInputPlugin.java +187 -187
- data/src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java +50 -75
- data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java +19 -19
- data/src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java +9 -12
- data/src/test/resources/data/test-semicolon.csv +4 -0
- metadata +4 -4
- data/classpath/embulk-input-filesplit-0.1.2.jar +0 -0
- data/src/test/java/org/embulk/input/filesplit/EmptyConfigSource.java +0 -107
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6c43f390ee5dd1de19e4481dd2443dc3d3d63c0
|
4
|
+
data.tar.gz: 7fb23bbe679ffa5fc730e3f553a1c14c2633f381
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a55da85aa17a112889765c8ec8281e1816c40e00a849032e72ca4618d85c1dda1c759cf178a5bffc5b1f3bd73f918c9eb2e3461f825eedb30b5010810273e833
|
7
|
+
data.tar.gz: eb261354f866e3e4833815fd19c9634a380283dbadf9a47dcb8f12eb6135f502ee0c7d8e13f10c973b91f06fddb77e479f243bf8520e9b2f2338c14b388f35ce
|
data/build.gradle
CHANGED
@@ -10,7 +10,7 @@ apply plugin: 'com.github.jruby-gradle.base'
|
|
10
10
|
|
11
11
|
[compileJava, compileTestJava]*.options*.encoding = 'UTF-8'
|
12
12
|
|
13
|
-
project.version = '0.1.
|
13
|
+
project.version = '0.1.3'
|
14
14
|
|
15
15
|
repositories {
|
16
16
|
mavenCentral()
|
@@ -22,9 +22,9 @@ configurations {
|
|
22
22
|
}
|
23
23
|
|
24
24
|
dependencies {
|
25
|
-
compile 'org.embulk:embulk-core:0.
|
26
|
-
provided 'org.embulk:embulk-core:0.
|
27
|
-
testCompile 'org.embulk:embulk-standards:0.
|
25
|
+
compile 'org.embulk:embulk-core:0.7.4'
|
26
|
+
provided 'org.embulk:embulk-core:0.7.4'
|
27
|
+
testCompile 'org.embulk:embulk-standards:0.7.4'
|
28
28
|
testCompile 'junit:junit:4.+'
|
29
29
|
}
|
30
30
|
|
Binary file
|
@@ -1,187 +1,187 @@
|
|
1
|
-
package org.embulk.input.filesplit;
|
2
|
-
|
3
|
-
import java.io.BufferedInputStream;
|
4
|
-
import java.io.ByteArrayInputStream;
|
5
|
-
import java.io.ByteArrayOutputStream;
|
6
|
-
import java.io.File;
|
7
|
-
import java.io.FileInputStream;
|
8
|
-
import java.io.IOException;
|
9
|
-
import java.io.InputStream;
|
10
|
-
import java.io.SequenceInputStream;
|
11
|
-
import java.util.ArrayList;
|
12
|
-
import java.util.List;
|
13
|
-
|
14
|
-
import org.embulk.config.
|
15
|
-
import org.embulk.config.
|
16
|
-
import org.embulk.config.
|
17
|
-
import org.embulk.config.
|
18
|
-
import org.embulk.config.
|
19
|
-
import org.embulk.config.
|
20
|
-
import org.embulk.config.
|
21
|
-
import org.embulk.config.TaskSource;
|
22
|
-
import org.embulk.spi.BufferAllocator;
|
23
|
-
import org.embulk.spi.Exec;
|
24
|
-
import org.embulk.spi.FileInputPlugin;
|
25
|
-
import org.embulk.spi.TransactionalFileInput;
|
26
|
-
import org.embulk.spi.util.InputStreamFileInput;
|
27
|
-
|
28
|
-
import com.google.common.base.Optional;
|
29
|
-
|
30
|
-
|
31
|
-
public class LocalFileSplitInputPlugin
|
32
|
-
implements FileInputPlugin
|
33
|
-
{
|
34
|
-
public interface PluginTask
|
35
|
-
extends Task
|
36
|
-
{
|
37
|
-
@Config("path")
|
38
|
-
public String getPath();
|
39
|
-
|
40
|
-
@Config("tasks")
|
41
|
-
@ConfigDefault("null")
|
42
|
-
public Optional<Integer> getTasks();
|
43
|
-
|
44
|
-
@Config("header_line")
|
45
|
-
@ConfigDefault("false")
|
46
|
-
public boolean getHeaderLine();
|
47
|
-
|
48
|
-
public List<PartialFile> getFiles();
|
49
|
-
public void setFiles(List<PartialFile> files);
|
50
|
-
|
51
|
-
@ConfigInject
|
52
|
-
public BufferAllocator getBufferAllocator();
|
53
|
-
}
|
54
|
-
|
55
|
-
@Override
|
56
|
-
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
57
|
-
{
|
58
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
59
|
-
|
60
|
-
int tasks;
|
61
|
-
if (task.getTasks().isPresent()) {
|
62
|
-
tasks = task.getTasks().get();
|
63
|
-
if (tasks <= 0) {
|
64
|
-
throw new IllegalArgumentException(String.format("'tasks' is %d but must be greater than 0", tasks));
|
65
|
-
}
|
66
|
-
} else {
|
67
|
-
tasks = Runtime.getRuntime().availableProcessors() * 2;
|
68
|
-
}
|
69
|
-
|
70
|
-
long size = new File(task.getPath()).length();
|
71
|
-
List<PartialFile> files = new ArrayList<PartialFile>();
|
72
|
-
for (int i = 0; i < tasks; i++) {
|
73
|
-
long start = size * i / tasks;
|
74
|
-
long end = size * (i + 1) / tasks;
|
75
|
-
if (start < end) {
|
76
|
-
files.add(new PartialFile(task.getPath(), start, end));
|
77
|
-
}
|
78
|
-
}
|
79
|
-
|
80
|
-
task.setFiles(files);
|
81
|
-
|
82
|
-
return resume(task.dump(), task.getFiles().size(), control);
|
83
|
-
}
|
84
|
-
|
85
|
-
@Override
|
86
|
-
public ConfigDiff resume(TaskSource taskSource,
|
87
|
-
int taskCount,
|
88
|
-
FileInputPlugin.Control control)
|
89
|
-
{
|
90
|
-
control.run(taskSource, taskCount);
|
91
|
-
|
92
|
-
return Exec.newConfigDiff();
|
93
|
-
}
|
94
|
-
|
95
|
-
@Override
|
96
|
-
public void cleanup(TaskSource taskSource,
|
97
|
-
int taskCount,
|
98
|
-
List<
|
99
|
-
{ }
|
100
|
-
|
101
|
-
@Override
|
102
|
-
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
103
|
-
{
|
104
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
105
|
-
return new LocalFileSplitInput(task, taskIndex);
|
106
|
-
}
|
107
|
-
|
108
|
-
public static class LocalFileSplitInput
|
109
|
-
extends InputStreamFileInput
|
110
|
-
implements TransactionalFileInput
|
111
|
-
{
|
112
|
-
public static class FileSplitProvider
|
113
|
-
implements InputStreamFileInput.Provider
|
114
|
-
{
|
115
|
-
private final PartialFile file;
|
116
|
-
private final boolean hasHeader;
|
117
|
-
private boolean opened = false;
|
118
|
-
|
119
|
-
public FileSplitProvider(PartialFile file, boolean hasHeader)
|
120
|
-
{
|
121
|
-
this.file = file;
|
122
|
-
this.hasHeader = hasHeader;
|
123
|
-
}
|
124
|
-
|
125
|
-
@Override
|
126
|
-
public InputStream openNext() throws IOException
|
127
|
-
{
|
128
|
-
if (opened) {
|
129
|
-
return null;
|
130
|
-
}
|
131
|
-
opened = true;
|
132
|
-
|
133
|
-
InputStream in = new PartialFileInputStream(new FileInputStream(file.getPath()), file.getStart(), file.getEnd());
|
134
|
-
if (file.getStart() > 0 && hasHeader) {
|
135
|
-
in = new SequenceInputStream(openHeader(file.getPath()), in);
|
136
|
-
}
|
137
|
-
return in;
|
138
|
-
}
|
139
|
-
|
140
|
-
@Override
|
141
|
-
public void close() { }
|
142
|
-
|
143
|
-
private InputStream openHeader(String path) throws IOException
|
144
|
-
{
|
145
|
-
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
146
|
-
try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(path))) {
|
147
|
-
while (true) {
|
148
|
-
int c = in.read();
|
149
|
-
if (c < 0) {
|
150
|
-
break;
|
151
|
-
}
|
152
|
-
|
153
|
-
header.write(c);
|
154
|
-
|
155
|
-
if (c == '\n') {
|
156
|
-
break;
|
157
|
-
}
|
158
|
-
|
159
|
-
if (c == '\r') {
|
160
|
-
int c2 = in.read();
|
161
|
-
if (c2 == '\n') {
|
162
|
-
header.write(c2);
|
163
|
-
}
|
164
|
-
break;
|
165
|
-
}
|
166
|
-
}
|
167
|
-
}
|
168
|
-
header.close();
|
169
|
-
return new ByteArrayInputStream(header.toByteArray());
|
170
|
-
}
|
171
|
-
}
|
172
|
-
|
173
|
-
public LocalFileSplitInput(PluginTask task, int taskIndex)
|
174
|
-
{
|
175
|
-
super(task.getBufferAllocator(), new FileSplitProvider(task.getFiles().get(taskIndex), task.getHeaderLine()));
|
176
|
-
}
|
177
|
-
|
178
|
-
@Override
|
179
|
-
public void abort() { }
|
180
|
-
|
181
|
-
@Override
|
182
|
-
public
|
183
|
-
{
|
184
|
-
return Exec.
|
185
|
-
}
|
186
|
-
}
|
187
|
-
}
|
1
|
+
package org.embulk.input.filesplit;
|
2
|
+
|
3
|
+
import java.io.BufferedInputStream;
|
4
|
+
import java.io.ByteArrayInputStream;
|
5
|
+
import java.io.ByteArrayOutputStream;
|
6
|
+
import java.io.File;
|
7
|
+
import java.io.FileInputStream;
|
8
|
+
import java.io.IOException;
|
9
|
+
import java.io.InputStream;
|
10
|
+
import java.io.SequenceInputStream;
|
11
|
+
import java.util.ArrayList;
|
12
|
+
import java.util.List;
|
13
|
+
|
14
|
+
import org.embulk.config.Config;
|
15
|
+
import org.embulk.config.ConfigDefault;
|
16
|
+
import org.embulk.config.ConfigDiff;
|
17
|
+
import org.embulk.config.ConfigInject;
|
18
|
+
import org.embulk.config.ConfigSource;
|
19
|
+
import org.embulk.config.Task;
|
20
|
+
import org.embulk.config.TaskReport;
|
21
|
+
import org.embulk.config.TaskSource;
|
22
|
+
import org.embulk.spi.BufferAllocator;
|
23
|
+
import org.embulk.spi.Exec;
|
24
|
+
import org.embulk.spi.FileInputPlugin;
|
25
|
+
import org.embulk.spi.TransactionalFileInput;
|
26
|
+
import org.embulk.spi.util.InputStreamFileInput;
|
27
|
+
|
28
|
+
import com.google.common.base.Optional;
|
29
|
+
|
30
|
+
|
31
|
+
public class LocalFileSplitInputPlugin
|
32
|
+
implements FileInputPlugin
|
33
|
+
{
|
34
|
+
public interface PluginTask
|
35
|
+
extends Task
|
36
|
+
{
|
37
|
+
@Config("path")
|
38
|
+
public String getPath();
|
39
|
+
|
40
|
+
@Config("tasks")
|
41
|
+
@ConfigDefault("null")
|
42
|
+
public Optional<Integer> getTasks();
|
43
|
+
|
44
|
+
@Config("header_line")
|
45
|
+
@ConfigDefault("false")
|
46
|
+
public boolean getHeaderLine();
|
47
|
+
|
48
|
+
public List<PartialFile> getFiles();
|
49
|
+
public void setFiles(List<PartialFile> files);
|
50
|
+
|
51
|
+
@ConfigInject
|
52
|
+
public BufferAllocator getBufferAllocator();
|
53
|
+
}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
57
|
+
{
|
58
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
59
|
+
|
60
|
+
int tasks;
|
61
|
+
if (task.getTasks().isPresent()) {
|
62
|
+
tasks = task.getTasks().get();
|
63
|
+
if (tasks <= 0) {
|
64
|
+
throw new IllegalArgumentException(String.format("'tasks' is %d but must be greater than 0", tasks));
|
65
|
+
}
|
66
|
+
} else {
|
67
|
+
tasks = Runtime.getRuntime().availableProcessors() * 2;
|
68
|
+
}
|
69
|
+
|
70
|
+
long size = new File(task.getPath()).length();
|
71
|
+
List<PartialFile> files = new ArrayList<PartialFile>();
|
72
|
+
for (int i = 0; i < tasks; i++) {
|
73
|
+
long start = size * i / tasks;
|
74
|
+
long end = size * (i + 1) / tasks;
|
75
|
+
if (start < end) {
|
76
|
+
files.add(new PartialFile(task.getPath(), start, end));
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
task.setFiles(files);
|
81
|
+
|
82
|
+
return resume(task.dump(), task.getFiles().size(), control);
|
83
|
+
}
|
84
|
+
|
85
|
+
@Override
|
86
|
+
public ConfigDiff resume(TaskSource taskSource,
|
87
|
+
int taskCount,
|
88
|
+
FileInputPlugin.Control control)
|
89
|
+
{
|
90
|
+
control.run(taskSource, taskCount);
|
91
|
+
|
92
|
+
return Exec.newConfigDiff();
|
93
|
+
}
|
94
|
+
|
95
|
+
@Override
|
96
|
+
public void cleanup(TaskSource taskSource,
|
97
|
+
int taskCount,
|
98
|
+
List<TaskReport> successTaskReports)
|
99
|
+
{ }
|
100
|
+
|
101
|
+
@Override
|
102
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
103
|
+
{
|
104
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
105
|
+
return new LocalFileSplitInput(task, taskIndex);
|
106
|
+
}
|
107
|
+
|
108
|
+
public static class LocalFileSplitInput
|
109
|
+
extends InputStreamFileInput
|
110
|
+
implements TransactionalFileInput
|
111
|
+
{
|
112
|
+
public static class FileSplitProvider
|
113
|
+
implements InputStreamFileInput.Provider
|
114
|
+
{
|
115
|
+
private final PartialFile file;
|
116
|
+
private final boolean hasHeader;
|
117
|
+
private boolean opened = false;
|
118
|
+
|
119
|
+
public FileSplitProvider(PartialFile file, boolean hasHeader)
|
120
|
+
{
|
121
|
+
this.file = file;
|
122
|
+
this.hasHeader = hasHeader;
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public InputStream openNext() throws IOException
|
127
|
+
{
|
128
|
+
if (opened) {
|
129
|
+
return null;
|
130
|
+
}
|
131
|
+
opened = true;
|
132
|
+
|
133
|
+
InputStream in = new PartialFileInputStream(new FileInputStream(file.getPath()), file.getStart(), file.getEnd());
|
134
|
+
if (file.getStart() > 0 && hasHeader) {
|
135
|
+
in = new SequenceInputStream(openHeader(file.getPath()), in);
|
136
|
+
}
|
137
|
+
return in;
|
138
|
+
}
|
139
|
+
|
140
|
+
@Override
|
141
|
+
public void close() { }
|
142
|
+
|
143
|
+
private InputStream openHeader(String path) throws IOException
|
144
|
+
{
|
145
|
+
ByteArrayOutputStream header = new ByteArrayOutputStream();
|
146
|
+
try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(path))) {
|
147
|
+
while (true) {
|
148
|
+
int c = in.read();
|
149
|
+
if (c < 0) {
|
150
|
+
break;
|
151
|
+
}
|
152
|
+
|
153
|
+
header.write(c);
|
154
|
+
|
155
|
+
if (c == '\n') {
|
156
|
+
break;
|
157
|
+
}
|
158
|
+
|
159
|
+
if (c == '\r') {
|
160
|
+
int c2 = in.read();
|
161
|
+
if (c2 == '\n') {
|
162
|
+
header.write(c2);
|
163
|
+
}
|
164
|
+
break;
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
header.close();
|
169
|
+
return new ByteArrayInputStream(header.toByteArray());
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
public LocalFileSplitInput(PluginTask task, int taskIndex)
|
174
|
+
{
|
175
|
+
super(task.getBufferAllocator(), new FileSplitProvider(task.getFiles().get(taskIndex), task.getHeaderLine()));
|
176
|
+
}
|
177
|
+
|
178
|
+
@Override
|
179
|
+
public void abort() { }
|
180
|
+
|
181
|
+
@Override
|
182
|
+
public TaskReport commit()
|
183
|
+
{
|
184
|
+
return Exec.newTaskReport();
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
@@ -5,91 +5,66 @@ import java.io.BufferedWriter;
|
|
5
5
|
import java.io.File;
|
6
6
|
import java.io.FileReader;
|
7
7
|
import java.io.FileWriter;
|
8
|
-
import java.io.IOException;
|
9
|
-
import java.net.URISyntaxException;
|
10
|
-
import java.util.Arrays;
|
11
8
|
import java.util.regex.Matcher;
|
12
9
|
import java.util.regex.Pattern;
|
13
10
|
|
14
|
-
import org.embulk.
|
15
|
-
import org.embulk.
|
11
|
+
import org.embulk.EmbulkEmbed;
|
12
|
+
import org.embulk.EmbulkEmbed.Bootstrap;
|
16
13
|
import org.embulk.config.ConfigSource;
|
17
|
-
import org.embulk.exec.ExecutionResult;
|
18
|
-
import org.embulk.exec.LocalExecutor;
|
19
14
|
import org.embulk.plugin.InjectedPluginSource;
|
20
|
-
import org.embulk.spi.ExecSession;
|
21
15
|
|
22
16
|
import com.google.inject.Binder;
|
23
|
-
import com.google.inject.Injector;
|
24
17
|
import com.google.inject.Module;
|
25
18
|
|
19
|
+
|
26
20
|
public class EmbulkPluginTester {
|
27
|
-
|
28
|
-
private final Class<?> iface;
|
29
|
-
private final String name;
|
30
|
-
private final Class<?> impl;
|
31
|
-
|
32
|
-
|
33
|
-
public EmbulkPluginTester(Class<?> iface, String name, Class<?> impl)
|
34
|
-
{
|
35
|
-
this.iface = iface;
|
36
|
-
this.name = name;
|
37
|
-
this.impl = impl;
|
38
|
-
}
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
22
|
+
private EmbulkEmbed embulk;
|
23
|
+
|
24
|
+
public EmbulkPluginTester(final Class<?> iface, final String name, final Class<?> impl)
|
25
|
+
{
|
26
|
+
Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
|
27
|
+
bootstrap.addModules(new Module()
|
28
|
+
{
|
29
|
+
@Override
|
30
|
+
public void configure(Binder binder)
|
31
|
+
{
|
32
|
+
InjectedPluginSource.registerPluginTo(binder, iface, name, impl);
|
33
|
+
}
|
34
|
+
});
|
35
|
+
embulk = bootstrap.initializeCloseable();
|
36
|
+
}
|
37
|
+
|
38
|
+
public void run(String ymlPath) throws Exception
|
39
|
+
{
|
40
|
+
ConfigSource config = embulk.newConfigLoader().fromYamlFile(new File(convert(ymlPath)));
|
41
|
+
embulk.run(config);
|
42
|
+
}
|
43
|
+
|
44
|
+
private String convert(String yml) throws Exception
|
45
|
+
{
|
46
|
+
File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
|
47
|
+
File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
|
48
|
+
File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
|
49
|
+
Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
|
50
|
+
try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
|
51
|
+
try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
|
52
|
+
String line;
|
53
|
+
while ((line = reader.readLine()) != null) {
|
54
|
+
Matcher matcher = pathPrefixPattern.matcher(line);
|
55
|
+
if (matcher.matches()) {
|
56
|
+
int group = 2;
|
57
|
+
writer.write(line.substring(0, matcher.start(group)));
|
58
|
+
writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
|
59
|
+
writer.write(line.substring(matcher.end(group)));
|
60
|
+
} else {
|
61
|
+
writer.write(line);
|
62
|
+
}
|
63
|
+
writer.newLine();
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
return tempYmlPath.getAbsolutePath();
|
68
|
+
}
|
62
69
|
|
63
|
-
private File convert(String yml) {
|
64
|
-
try {
|
65
|
-
File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
|
66
|
-
File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
|
67
|
-
File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
|
68
|
-
Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
|
69
|
-
try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
|
70
|
-
try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
|
71
|
-
String line;
|
72
|
-
while ((line = reader.readLine()) != null) {
|
73
|
-
Matcher matcher = pathPrefixPattern.matcher(line);
|
74
|
-
if (matcher.matches()) {
|
75
|
-
int group = 2;
|
76
|
-
writer.write(line.substring(0, matcher.start(group)));
|
77
|
-
writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
|
78
|
-
writer.write(line.substring(matcher.end(group)));
|
79
|
-
} else {
|
80
|
-
writer.write(line);
|
81
|
-
}
|
82
|
-
writer.newLine();
|
83
|
-
}
|
84
|
-
}
|
85
|
-
}
|
86
|
-
return tempYmlPath.getAbsoluteFile();
|
87
|
-
|
88
|
-
} catch (IOException e) {
|
89
|
-
throw new RuntimeException(e);
|
90
|
-
} catch (URISyntaxException e) {
|
91
|
-
throw new RuntimeException(e);
|
92
|
-
}
|
93
|
-
}
|
94
|
-
|
95
70
|
}
|
@@ -16,33 +16,33 @@ import org.junit.Test;
|
|
16
16
|
import static org.junit.Assert.assertEquals;
|
17
17
|
|
18
18
|
public class LocalFileSplitInputPluginTest {
|
19
|
-
|
20
|
-
private EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
|
21
|
-
|
19
|
+
|
20
|
+
private static EmbulkPluginTester tester = new EmbulkPluginTester(InputPlugin.class, "filesplit", LocalFileSplitInputPlugin.class);
|
21
|
+
|
22
22
|
@Test
|
23
23
|
public void test() throws Exception
|
24
24
|
{
|
25
|
-
run("/yml/test.yml", "/data/test.csv");
|
25
|
+
run("/yml/test.yml", "/data/test-semicolon.csv");
|
26
26
|
}
|
27
|
-
|
27
|
+
|
28
28
|
@Test
|
29
29
|
public void testTasks() throws Exception
|
30
30
|
{
|
31
|
-
run("/yml/test-tasks.yml", "/data/test.csv");
|
31
|
+
run("/yml/test-tasks.yml", "/data/test-semicolon.csv");
|
32
32
|
}
|
33
|
-
|
33
|
+
|
34
34
|
@Test
|
35
35
|
public void testHeader() throws Exception
|
36
36
|
{
|
37
|
-
run("/yml/test-header.yml", "/data/test.csv");
|
37
|
+
run("/yml/test-header.yml", "/data/test-semicolon.csv");
|
38
38
|
}
|
39
|
-
|
39
|
+
|
40
40
|
@Test
|
41
41
|
public void testOnlyHeader() throws Exception
|
42
42
|
{
|
43
43
|
run("/yml/test-only-header.yml", "/data/empty.csv");
|
44
44
|
}
|
45
|
-
|
45
|
+
|
46
46
|
private void run(String ymlPath, String expectedName) throws Exception
|
47
47
|
{
|
48
48
|
List<String> expected = readAll(expectedName);
|
@@ -50,13 +50,13 @@ public class LocalFileSplitInputPluginTest {
|
|
50
50
|
|
51
51
|
File file = prepare();
|
52
52
|
tester.run(ymlPath);
|
53
|
-
|
53
|
+
|
54
54
|
List<String> actual= readAll(file);
|
55
55
|
Collections.sort(actual);
|
56
|
-
|
56
|
+
|
57
57
|
assertEquals(expected, actual);
|
58
58
|
}
|
59
|
-
|
59
|
+
|
60
60
|
private File prepare() throws URISyntaxException
|
61
61
|
{
|
62
62
|
File file = new File(new File(getClass().getResource("/resource.txt").toURI()).getParentFile(), "temp");
|
@@ -66,20 +66,20 @@ public class LocalFileSplitInputPluginTest {
|
|
66
66
|
}
|
67
67
|
return file;
|
68
68
|
}
|
69
|
-
|
69
|
+
|
70
70
|
private List<String> readAll(String name) throws IOException, URISyntaxException
|
71
71
|
{
|
72
72
|
return readAll(new File(getClass().getResource(name).toURI()));
|
73
73
|
}
|
74
|
-
|
75
|
-
private List<String> readAll(File file) throws IOException
|
74
|
+
|
75
|
+
private List<String> readAll(File file) throws IOException
|
76
76
|
{
|
77
77
|
if (file.isFile()) {
|
78
78
|
FileSystem fs = FileSystems.getDefault();
|
79
79
|
Charset charset = Charset.forName("UTF-8");
|
80
80
|
return Files.readAllLines(fs.getPath(file.getAbsolutePath()), charset);
|
81
81
|
}
|
82
|
-
|
82
|
+
|
83
83
|
if (file.isDirectory()) {
|
84
84
|
List<String> lines = new ArrayList<String>();
|
85
85
|
for (File child : file.listFiles()) {
|
@@ -87,8 +87,8 @@ public class LocalFileSplitInputPluginTest {
|
|
87
87
|
}
|
88
88
|
return lines;
|
89
89
|
}
|
90
|
-
|
90
|
+
|
91
91
|
return Collections.emptyList();
|
92
92
|
}
|
93
|
-
|
93
|
+
|
94
94
|
}
|
@@ -1,6 +1,3 @@
|
|
1
|
-
/*
|
2
|
-
* $Id: typical.epf 2627 2010-03-18 01:40:13Z tiba $
|
3
|
-
*/
|
4
1
|
package org.embulk.input.filesplit;
|
5
2
|
|
6
3
|
import static org.junit.Assert.assertEquals;
|
@@ -16,7 +13,7 @@ import org.embulk.input.filesplit.LocalFileSplitInputPlugin.LocalFileSplitInput.
|
|
16
13
|
import org.junit.Test;
|
17
14
|
|
18
15
|
public class LocalFileSplitInputTest {
|
19
|
-
|
16
|
+
|
20
17
|
@Test
|
21
18
|
public void testHeader() throws Exception
|
22
19
|
{
|
@@ -35,27 +32,27 @@ public class LocalFileSplitInputTest {
|
|
35
32
|
assertEquals("id,name,value", reader.readLine());
|
36
33
|
assertEquals(null, reader.readLine());
|
37
34
|
}
|
38
|
-
|
35
|
+
|
39
36
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 20)))) {
|
40
37
|
assertEquals("id,name,value", reader.readLine());
|
41
38
|
assertEquals("1,aaaaa,12345", reader.readLine());
|
42
39
|
assertEquals(null, reader.readLine());
|
43
40
|
}
|
44
|
-
|
41
|
+
|
45
42
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 1, 40)))) {
|
46
43
|
assertEquals("id,name,value", reader.readLine());
|
47
44
|
assertEquals("1,aaaaa,12345", reader.readLine());
|
48
45
|
assertEquals("2,bbb,67890", reader.readLine());
|
49
46
|
assertEquals(null, reader.readLine());
|
50
47
|
}
|
51
|
-
|
48
|
+
|
52
49
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-header.csv", 20, 40)))) {
|
53
50
|
assertEquals("id,name,value", reader.readLine());
|
54
51
|
assertEquals("2,bbb,67890", reader.readLine());
|
55
52
|
assertEquals(null, reader.readLine());
|
56
53
|
}
|
57
54
|
}
|
58
|
-
|
55
|
+
|
59
56
|
@Test
|
60
57
|
public void testOnlyHeader() throws Exception
|
61
58
|
{
|
@@ -63,19 +60,19 @@ public class LocalFileSplitInputTest {
|
|
63
60
|
assertEquals("id,name,value", reader.readLine());
|
64
61
|
assertEquals(null, reader.readLine());
|
65
62
|
}
|
66
|
-
|
63
|
+
|
67
64
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(open("/data/test-only-header.csv", 1, 10)))) {
|
68
65
|
assertEquals("id,name,value", reader.readLine());
|
69
66
|
assertEquals(null, reader.readLine());
|
70
67
|
}
|
71
68
|
}
|
72
|
-
|
73
|
-
private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
|
69
|
+
|
70
|
+
private InputStream open(String name, int start, int end) throws IOException, URISyntaxException
|
74
71
|
{
|
75
72
|
File path = new File(getClass().getResource(name).toURI());
|
76
73
|
try (FileSplitProvider provider = new FileSplitProvider(new PartialFile(path.getAbsolutePath(), start, end), true)) {
|
77
74
|
return provider.openNext();
|
78
75
|
}
|
79
76
|
}
|
80
|
-
|
77
|
+
|
81
78
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-filesplit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hitoshi Tanaka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -23,20 +23,20 @@ files:
|
|
23
23
|
- src/main/java/org/embulk/input/filesplit/PartialFile.java
|
24
24
|
- src/main/java/org/embulk/input/filesplit/PartialFileInputStream.java
|
25
25
|
- src/test/java/org/embulk/input/filesplit/EmbulkPluginTester.java
|
26
|
-
- src/test/java/org/embulk/input/filesplit/EmptyConfigSource.java
|
27
26
|
- src/test/java/org/embulk/input/filesplit/LocalFileSplitInputPluginTest.java
|
28
27
|
- src/test/java/org/embulk/input/filesplit/LocalFileSplitInputTest.java
|
29
28
|
- src/test/java/org/embulk/input/filesplit/PartialFileInputStreamTest.java
|
30
29
|
- src/test/resources/data/empty.csv
|
31
30
|
- src/test/resources/data/test-header.csv
|
32
31
|
- src/test/resources/data/test-only-header.csv
|
32
|
+
- src/test/resources/data/test-semicolon.csv
|
33
33
|
- src/test/resources/data/test.csv
|
34
34
|
- src/test/resources/resource.txt
|
35
35
|
- src/test/resources/yml/test-header.yml
|
36
36
|
- src/test/resources/yml/test-only-header.yml
|
37
37
|
- src/test/resources/yml/test-tasks.yml
|
38
38
|
- src/test/resources/yml/test.yml
|
39
|
-
- classpath/embulk-input-filesplit-0.1.
|
39
|
+
- classpath/embulk-input-filesplit-0.1.3.jar
|
40
40
|
homepage: https://github.com/hito4t/embulk-input-filesplit
|
41
41
|
licenses:
|
42
42
|
- Apache 2.0
|
Binary file
|
@@ -1,107 +0,0 @@
|
|
1
|
-
package org.embulk.input.filesplit;
|
2
|
-
|
3
|
-
import java.util.Collections;
|
4
|
-
import java.util.List;
|
5
|
-
import java.util.Map.Entry;
|
6
|
-
|
7
|
-
import org.embulk.config.ConfigSource;
|
8
|
-
import org.embulk.config.DataSource;
|
9
|
-
|
10
|
-
import com.fasterxml.jackson.databind.JsonNode;
|
11
|
-
import com.fasterxml.jackson.databind.node.ObjectNode;
|
12
|
-
|
13
|
-
public class EmptyConfigSource implements ConfigSource
|
14
|
-
{
|
15
|
-
|
16
|
-
@Override
|
17
|
-
public <E> E get(Class<E> type, String attrName)
|
18
|
-
{
|
19
|
-
return null;
|
20
|
-
}
|
21
|
-
|
22
|
-
@Override
|
23
|
-
public <E> E get(Class<E> type, String attrName, E defaultValue)
|
24
|
-
{
|
25
|
-
return defaultValue;
|
26
|
-
}
|
27
|
-
|
28
|
-
@Override
|
29
|
-
public List<String> getAttributeNames()
|
30
|
-
{
|
31
|
-
return Collections.emptyList();
|
32
|
-
}
|
33
|
-
|
34
|
-
@Override
|
35
|
-
public Iterable<Entry<String, JsonNode>> getAttributes()
|
36
|
-
{
|
37
|
-
return Collections.emptyList();
|
38
|
-
}
|
39
|
-
|
40
|
-
@Override
|
41
|
-
public ObjectNode getObjectNode()
|
42
|
-
{
|
43
|
-
return null;
|
44
|
-
}
|
45
|
-
|
46
|
-
@Override
|
47
|
-
public boolean isEmpty()
|
48
|
-
{
|
49
|
-
return true;
|
50
|
-
}
|
51
|
-
|
52
|
-
@Override
|
53
|
-
public ConfigSource deepCopy()
|
54
|
-
{
|
55
|
-
return this;
|
56
|
-
}
|
57
|
-
|
58
|
-
@Override
|
59
|
-
public ConfigSource getNested(String s)
|
60
|
-
{
|
61
|
-
// TODO 自動生成されたメソッド・スタブ
|
62
|
-
return null;
|
63
|
-
}
|
64
|
-
|
65
|
-
@Override
|
66
|
-
public ConfigSource getNestedOrSetEmpty(String s)
|
67
|
-
{
|
68
|
-
// TODO 自動生成されたメソッド・スタブ
|
69
|
-
return null;
|
70
|
-
}
|
71
|
-
|
72
|
-
@Override
|
73
|
-
public <T> T loadConfig(Class<T> class1)
|
74
|
-
{
|
75
|
-
// TODO 自動生成されたメソッド・スタブ
|
76
|
-
return null;
|
77
|
-
}
|
78
|
-
|
79
|
-
@Override
|
80
|
-
public ConfigSource merge(DataSource datasource)
|
81
|
-
{
|
82
|
-
// TODO 自動生成されたメソッド・スタブ
|
83
|
-
return null;
|
84
|
-
}
|
85
|
-
|
86
|
-
@Override
|
87
|
-
public ConfigSource set(String s, Object obj)
|
88
|
-
{
|
89
|
-
// TODO 自動生成されたメソッド・スタブ
|
90
|
-
return null;
|
91
|
-
}
|
92
|
-
|
93
|
-
@Override
|
94
|
-
public ConfigSource setAll(DataSource datasource)
|
95
|
-
{
|
96
|
-
// TODO 自動生成されたメソッド・スタブ
|
97
|
-
return null;
|
98
|
-
}
|
99
|
-
|
100
|
-
@Override
|
101
|
-
public ConfigSource setNested(String s, DataSource datasource)
|
102
|
-
{
|
103
|
-
// TODO 自動生成されたメソッド・スタブ
|
104
|
-
return null;
|
105
|
-
}
|
106
|
-
|
107
|
-
}
|