embulk 0.4.10 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +4 -3
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +22 -3
- data/embulk-core/src/main/java/org/embulk/exec/ForGuess.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +57 -31
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +6 -5
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +14 -10
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +2 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +22 -0
- data/embulk-docs/plugins/index.html.erb +2 -2
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +3 -3
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.5.0.rst +81 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +13 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +9 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +68 -11
- data/lib/embulk/column.rb +31 -8
- data/lib/embulk/command/embulk_new_plugin.rb +30 -22
- data/lib/embulk/command/embulk_run.rb +16 -3
- data/lib/embulk/data/new/README.md.erb +37 -2
- data/lib/embulk/data/new/java/input.java.erb +14 -0
- data/lib/embulk/data/new/java/output.java.erb +4 -0
- data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +25 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +11 -1
- data/lib/embulk/data/new/ruby/parser_guess.rb.erb +65 -0
- data/lib/embulk/guess/csv.rb +7 -81
- data/lib/embulk/guess/schema_guess.rb +107 -0
- data/lib/embulk/guess/time_format_guess.rb +2 -1
- data/lib/embulk/guess_plugin.rb +20 -0
- data/lib/embulk/input_plugin.rb +10 -0
- data/lib/embulk/schema.rb +9 -2
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_schema_guess.rb +11 -0
- data/test/helper.rb +1 -2
- metadata +11 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5579fdedc41918d4b640030c9f2c8b088e3687ce
|
4
|
+
data.tar.gz: 0c01cabaca3edfee8c64a4add44a824b000d7a10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4c53e8d4452494e615d6cf0e8600a9874512fa2ca7ed7e024621438fb8a88a358fe91d800c4c8fe6516cc6cf5cd0bb29f3499fe431007deda40e0a9b1fadece
|
7
|
+
data.tar.gz: eea74616fed84cbbefd494a457748c533f9f8551ae240417b7741cba10497cd7a7e1c319410b7738e3f790785527c025c627390593cfcdeb132506359fa9eea9
|
data/README.md
CHANGED
@@ -28,7 +28,7 @@ The single-file package is the simplest way to try Embulk. You can download the
|
|
28
28
|
Following 4 commands install embulk to your home directory:
|
29
29
|
|
30
30
|
```
|
31
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.
|
31
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.0.jar
|
32
32
|
chmod +x ~/.embulk/bin/embulk
|
33
33
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
34
34
|
source ~/.bashrc
|
@@ -39,7 +39,7 @@ source ~/.bashrc
|
|
39
39
|
You can assume the jar file is a .bat file.
|
40
40
|
|
41
41
|
```
|
42
|
-
curl -o embulk.bat -L https://bintray.com/artifact/download/embulk/maven/embulk-0.
|
42
|
+
curl -o embulk.bat -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.0.jar
|
43
43
|
```
|
44
44
|
|
45
45
|
### Trying examples
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
12
12
|
|
13
13
|
allprojects {
|
14
14
|
group = 'org.embulk'
|
15
|
-
version = '0.
|
15
|
+
version = '0.5.0'
|
16
16
|
|
17
17
|
apply plugin: 'java'
|
18
18
|
apply plugin: 'maven-publish'
|
@@ -171,9 +171,10 @@ project(':embulk-cli') {
|
|
171
171
|
}
|
172
172
|
|
173
173
|
task classpath(type: Copy) {
|
174
|
-
|
174
|
+
File dest = file("${rootProject.projectDir}/classpath")
|
175
|
+
doFirst { dest.deleteDir() }
|
175
176
|
from configurations.runtime
|
176
|
-
into
|
177
|
+
into dest
|
177
178
|
}
|
178
179
|
}
|
179
180
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.command;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
+
import java.util.ArrayList;
|
4
5
|
import java.util.Map;
|
5
6
|
import java.util.HashMap;
|
6
7
|
import java.io.File;
|
@@ -19,6 +20,7 @@ import org.embulk.config.ConfigLoader;
|
|
19
20
|
import org.embulk.config.ConfigDiff;
|
20
21
|
import org.embulk.config.ModelManager;
|
21
22
|
import org.embulk.config.ConfigException;
|
23
|
+
import org.embulk.plugin.PluginType;
|
22
24
|
import org.embulk.exec.LocalExecutor;
|
23
25
|
import org.embulk.exec.ExecutionResult;
|
24
26
|
import org.embulk.exec.GuessExecutor;
|
@@ -45,6 +47,9 @@ public class Runner
|
|
45
47
|
|
46
48
|
private String previewOutputFormat;
|
47
49
|
public String getPreviewOutputFormat() { return previewOutputFormat; };
|
50
|
+
|
51
|
+
private List<PluginType> guessPlugins;
|
52
|
+
public List<PluginType> getGuessPlugins() { return guessPlugins; }
|
48
53
|
}
|
49
54
|
|
50
55
|
private final Options options;
|
@@ -57,12 +62,26 @@ public class Runner
|
|
57
62
|
ModelManager bootstrapModelManager = new ModelManager(null, new ObjectMapper());
|
58
63
|
this.options = bootstrapModelManager.readObject(Options.class, optionJson);
|
59
64
|
this.systemConfig = new ConfigLoader(bootstrapModelManager).fromPropertiesYamlLiteral(System.getProperties(), "embulk.");
|
65
|
+
mergeOptionsToSystemConfig(options, systemConfig);
|
66
|
+
this.service = new EmbulkService(systemConfig);
|
67
|
+
this.injector = service.getInjector();
|
68
|
+
}
|
69
|
+
|
70
|
+
@SuppressWarnings("unchecked")
|
71
|
+
private void mergeOptionsToSystemConfig(Options options, ConfigSource systemConfig)
|
72
|
+
{
|
60
73
|
String logLevel = options.getLogLevel();
|
61
74
|
if (logLevel != null) {
|
62
|
-
systemConfig.set("
|
75
|
+
systemConfig.set("log_level", logLevel);
|
76
|
+
}
|
77
|
+
|
78
|
+
List<PluginType> guessPlugins = options.getGuessPlugins();
|
79
|
+
if (guessPlugins != null && !guessPlugins.isEmpty()) {
|
80
|
+
List<PluginType> list = new ArrayList<PluginType>() { };
|
81
|
+
list = systemConfig.get((Class<List<PluginType>>) list.getClass(), "guess_plugins", list);
|
82
|
+
list.addAll(guessPlugins);
|
83
|
+
systemConfig.set("guess_plugins", list);
|
63
84
|
}
|
64
|
-
this.service = new EmbulkService(systemConfig);
|
65
|
-
this.injector = service.getInjector();
|
66
85
|
}
|
67
86
|
|
68
87
|
public void main(String command, String[] args)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import javax.inject.Qualifier;
|
4
|
+
import java.lang.annotation.Retention;
|
5
|
+
import java.lang.annotation.Target;
|
6
|
+
import static java.lang.annotation.ElementType.FIELD;
|
7
|
+
import static java.lang.annotation.ElementType.METHOD;
|
8
|
+
import static java.lang.annotation.ElementType.PARAMETER;
|
9
|
+
import static java.lang.annotation.RetentionPolicy.RUNTIME;
|
10
|
+
|
11
|
+
@Retention(RUNTIME)
|
12
|
+
@Target({FIELD, PARAMETER, METHOD})
|
13
|
+
@Qualifier
|
14
|
+
public @interface ForGuess
|
15
|
+
{
|
16
|
+
}
|
@@ -1,11 +1,15 @@
|
|
1
1
|
package org.embulk.exec;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
+
import java.util.Set;
|
4
5
|
import java.util.ArrayList;
|
5
6
|
import com.google.common.collect.ImmutableList;
|
7
|
+
import com.google.common.base.Throwables;
|
6
8
|
import com.google.inject.Inject;
|
7
9
|
import com.google.inject.Injector;
|
8
|
-
import com.google.
|
10
|
+
import com.google.inject.Binder;
|
11
|
+
import com.google.inject.multibindings.Multibinder;
|
12
|
+
import org.embulk.plugin.PluginType;
|
9
13
|
import org.embulk.config.Config;
|
10
14
|
import org.embulk.config.ConfigDefault;
|
11
15
|
import org.embulk.config.ConfigDiff;
|
@@ -14,7 +18,6 @@ import org.embulk.config.Task;
|
|
14
18
|
import org.embulk.config.TaskSource;
|
15
19
|
import org.embulk.config.ConfigSource;
|
16
20
|
import org.embulk.config.CommitReport;
|
17
|
-
import org.embulk.plugin.PluginType;
|
18
21
|
import org.embulk.spi.Schema;
|
19
22
|
import org.embulk.spi.Column;
|
20
23
|
import org.embulk.spi.Page;
|
@@ -33,10 +36,16 @@ import org.embulk.spi.FileInputRunner;
|
|
33
36
|
|
34
37
|
public class GuessExecutor
|
35
38
|
{
|
36
|
-
private final Injector injector;
|
37
|
-
private final ConfigSource systemConfig;
|
38
39
|
private final List<PluginType> defaultGuessPlugins;
|
39
40
|
|
41
|
+
private interface GuessExecutorSystemTask
|
42
|
+
extends Task
|
43
|
+
{
|
44
|
+
@Config("guess_plugins")
|
45
|
+
@ConfigDefault("[]")
|
46
|
+
public List<PluginType> getGuessPlugins();
|
47
|
+
}
|
48
|
+
|
40
49
|
private interface GuessExecutorTask
|
41
50
|
extends Task
|
42
51
|
{
|
@@ -49,19 +58,22 @@ public class GuessExecutor
|
|
49
58
|
public List<PluginType> getExcludeGuessPlugins();
|
50
59
|
}
|
51
60
|
|
61
|
+
public static void registerDefaultGuessPluginTo(Binder binder, PluginType type)
|
62
|
+
{
|
63
|
+
Multibinder<PluginType> multibinder = Multibinder.newSetBinder(binder, PluginType.class, ForGuess.class);
|
64
|
+
multibinder.addBinding().toInstance(type);
|
65
|
+
}
|
66
|
+
|
52
67
|
@Inject
|
53
|
-
public GuessExecutor(
|
54
|
-
@
|
68
|
+
public GuessExecutor(@ForSystemConfig ConfigSource systemConfig,
|
69
|
+
@ForGuess Set<PluginType> defaultGuessPlugins)
|
55
70
|
{
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
new PluginType("charset"),
|
63
|
-
new PluginType("newline"),
|
64
|
-
new PluginType("csv"));
|
71
|
+
GuessExecutorSystemTask systemTask = systemConfig.loadConfig(GuessExecutorSystemTask.class);
|
72
|
+
|
73
|
+
ImmutableList.Builder<PluginType> list = ImmutableList.builder();
|
74
|
+
list.addAll(defaultGuessPlugins);
|
75
|
+
list.addAll(systemTask.getGuessPlugins());
|
76
|
+
this.defaultGuessPlugins = list.build();
|
65
77
|
}
|
66
78
|
|
67
79
|
public ConfigDiff guess(ExecSession exec, final ConfigSource config)
|
@@ -80,29 +92,50 @@ public class GuessExecutor
|
|
80
92
|
}
|
81
93
|
}
|
82
94
|
|
95
|
+
protected InputPlugin newInputPlugin(ConfigSource inputConfig)
|
96
|
+
{
|
97
|
+
return Exec.newPlugin(InputPlugin.class, inputConfig.get(PluginType.class, "type"));
|
98
|
+
}
|
99
|
+
|
83
100
|
private ConfigDiff doGuess(ConfigSource config)
|
84
101
|
{
|
85
|
-
|
86
|
-
|
87
|
-
|
102
|
+
ConfigSource inputConfig = config.getNested("in");
|
103
|
+
|
104
|
+
InputPlugin input = newInputPlugin(inputConfig);
|
105
|
+
|
106
|
+
ConfigDiff inputGuessed;
|
107
|
+
try {
|
108
|
+
inputGuessed = input.guess(inputConfig);
|
109
|
+
} catch (AbstractMethodError ex) {
|
110
|
+
// for backward compatibility with embulk v0.4 interface
|
111
|
+
throw new UnsupportedOperationException(input.getClass().getSimpleName()+".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
|
88
112
|
}
|
89
113
|
|
114
|
+
ConfigDiff wrapped = Exec.newConfigDiff();
|
115
|
+
wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
|
116
|
+
return wrapped;
|
117
|
+
}
|
118
|
+
|
119
|
+
// called by FileInputRunner
|
120
|
+
public ConfigDiff guessParserConfig(Buffer sample, ConfigSource inputConfig, ConfigSource execConfig)
|
121
|
+
{
|
90
122
|
List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
|
91
|
-
|
123
|
+
|
124
|
+
GuessExecutorTask task = execConfig.loadConfig(GuessExecutorTask.class);
|
92
125
|
guessPlugins.addAll(task.getGuessPlugins());
|
93
126
|
guessPlugins.removeAll(task.getExcludeGuessPlugins());
|
94
127
|
|
95
|
-
return
|
128
|
+
return guessParserConfig(sample, inputConfig, guessPlugins);
|
96
129
|
}
|
97
130
|
|
98
|
-
private ConfigDiff
|
131
|
+
private ConfigDiff guessParserConfig(Buffer sample,
|
99
132
|
ConfigSource config, List<PluginType> guessPlugins)
|
100
133
|
{
|
101
134
|
// repeat guessing upto 10 times
|
102
135
|
ConfigDiff lastGuessed = Exec.newConfigDiff();
|
103
136
|
for (int i=0; i < 10; i++) {
|
104
137
|
// include last-guessed config to run guess input
|
105
|
-
ConfigSource originalConfig = config.
|
138
|
+
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
|
106
139
|
ConfigSource guessInputConfig = originalConfig.deepCopy();
|
107
140
|
guessInputConfig.getNestedOrSetEmpty("parser")
|
108
141
|
.set("type", "system_guess") // override in.parser.type so that FileInputPlugin creates GuessParserPlugin
|
@@ -142,19 +175,12 @@ public class GuessExecutor
|
|
142
175
|
// merge to the last-guessed config
|
143
176
|
if (lastGuessed.equals(guessed)) {
|
144
177
|
// not changed
|
145
|
-
return
|
178
|
+
return lastGuessed;
|
146
179
|
}
|
147
180
|
lastGuessed = guessed;
|
148
181
|
}
|
149
182
|
|
150
|
-
return
|
151
|
-
}
|
152
|
-
|
153
|
-
private static ConfigDiff wrapInIn(ConfigDiff lastGuessed)
|
154
|
-
{
|
155
|
-
ConfigDiff wrapped = Exec.newConfigDiff();
|
156
|
-
wrapped.getNestedOrSetEmpty("in").merge(lastGuessed);
|
157
|
-
return wrapped;
|
183
|
+
return lastGuessed;
|
158
184
|
}
|
159
185
|
|
160
186
|
private static class BufferFileInputPlugin
|
@@ -18,7 +18,7 @@ public class LoggerProvider
|
|
18
18
|
Properties prop = new Properties();
|
19
19
|
|
20
20
|
final String level;
|
21
|
-
String logLevel = systemConfig.get(String.class, "
|
21
|
+
String logLevel = systemConfig.get(String.class, "log_level", "info"); // here can't use loadConfig because ModelManager uses LoggerProvider
|
22
22
|
switch (logLevel) {
|
23
23
|
case "fatal": level = "FATAL"; break;
|
24
24
|
case "error": level = "ERROR"; break;
|
@@ -13,13 +13,15 @@ import org.embulk.spi.Buffer;
|
|
13
13
|
import org.embulk.spi.InputPlugin;
|
14
14
|
import org.embulk.spi.ParserPlugin;
|
15
15
|
import org.embulk.spi.FileInput;
|
16
|
+
import org.embulk.spi.FileInputRunner;
|
16
17
|
import org.embulk.spi.PageOutput;
|
18
|
+
import org.embulk.exec.ForSystemConfig;
|
17
19
|
import static org.embulk.spi.util.Inputs.each;
|
18
20
|
|
19
21
|
/*
|
20
|
-
* Used by
|
22
|
+
* Used by FileInputRunner.guess
|
21
23
|
*/
|
22
|
-
class SamplingParserPlugin
|
24
|
+
public class SamplingParserPlugin
|
23
25
|
implements ParserPlugin
|
24
26
|
{
|
25
27
|
private final int maxSampleSize;
|
@@ -44,13 +46,12 @@ class SamplingParserPlugin
|
|
44
46
|
throw new SampledNoticeError(buffer);
|
45
47
|
}
|
46
48
|
|
47
|
-
static Buffer runFileInputSampling(ConfigSource
|
49
|
+
public static Buffer runFileInputSampling(final FileInputRunner input, ConfigSource inputConfig)
|
48
50
|
{
|
49
51
|
// override in.parser.type so that FileInputRunner creates GuessParserPlugin
|
50
|
-
ConfigSource samplingInputConfig =
|
52
|
+
ConfigSource samplingInputConfig = inputConfig.deepCopy();
|
51
53
|
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
52
54
|
|
53
|
-
final InputPlugin input = Exec.newPlugin(InputPlugin.class, samplingInputConfig.get(PluginType.class, "type"));
|
54
55
|
try {
|
55
56
|
input.transaction(samplingInputConfig, new InputPlugin.Control() {
|
56
57
|
public List<CommitReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
@@ -27,8 +27,11 @@ public class ExecSession
|
|
27
27
|
private final ModelManager modelManager;
|
28
28
|
private final PluginManager pluginManager;
|
29
29
|
private final BufferAllocator bufferAllocator;
|
30
|
+
|
31
|
+
private final ConfigSource execConfig;
|
30
32
|
private final Timestamp transactionTime;
|
31
33
|
private final DateTimeZone transactionTimeZone;
|
34
|
+
|
32
35
|
private final boolean preview;
|
33
36
|
|
34
37
|
public interface SessionTask
|
@@ -44,16 +47,6 @@ public class ExecSession
|
|
44
47
|
}
|
45
48
|
|
46
49
|
public ExecSession(Injector injector, ConfigSource execConfig)
|
47
|
-
{
|
48
|
-
this(injector, execConfig.loadConfig(SessionTask.class));
|
49
|
-
}
|
50
|
-
|
51
|
-
public ExecSession(Injector injector, TaskSource taskSource)
|
52
|
-
{
|
53
|
-
this(injector, taskSource.loadTask(SessionTask.class));
|
54
|
-
}
|
55
|
-
|
56
|
-
public ExecSession(Injector injector, SessionTask task)
|
57
50
|
{
|
58
51
|
this.injector = injector;
|
59
52
|
this.loggerFactory = injector.getInstance(ILoggerFactory.class);
|
@@ -61,8 +54,11 @@ public class ExecSession
|
|
61
54
|
this.pluginManager = injector.getInstance(PluginManager.class);
|
62
55
|
this.bufferAllocator = injector.getInstance(BufferAllocator.class);
|
63
56
|
|
57
|
+
this.execConfig = execConfig.deepCopy();
|
58
|
+
SessionTask task = execConfig.loadConfig(SessionTask.class);
|
64
59
|
this.transactionTime = task.getTransactionTime().or(Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
|
65
60
|
this.transactionTimeZone = task.getTransactionTimeZone();
|
61
|
+
|
66
62
|
this.preview = false;
|
67
63
|
}
|
68
64
|
|
@@ -73,8 +69,11 @@ public class ExecSession
|
|
73
69
|
this.modelManager = copy.modelManager;
|
74
70
|
this.pluginManager = copy.pluginManager;
|
75
71
|
this.bufferAllocator = copy.bufferAllocator;
|
72
|
+
|
73
|
+
this.execConfig = copy.execConfig;
|
76
74
|
this.transactionTime = copy.transactionTime;
|
77
75
|
this.transactionTimeZone = copy.transactionTimeZone;
|
76
|
+
|
78
77
|
this.preview = preview;
|
79
78
|
}
|
80
79
|
|
@@ -120,6 +119,11 @@ public class ExecSession
|
|
120
119
|
return bufferAllocator;
|
121
120
|
}
|
122
121
|
|
122
|
+
public ConfigSource getExecConfig()
|
123
|
+
{
|
124
|
+
return execConfig;
|
125
|
+
}
|
126
|
+
|
123
127
|
public <T> T newPlugin(Class<T> iface, PluginType type)
|
124
128
|
{
|
125
129
|
return pluginManager.newPlugin(iface, type);
|
@@ -11,6 +11,9 @@ import org.embulk.config.Config;
|
|
11
11
|
import org.embulk.config.ConfigDefault;
|
12
12
|
import org.embulk.plugin.PluginType;
|
13
13
|
import org.embulk.spi.util.Decoders;
|
14
|
+
import org.embulk.exec.GuessExecutor;
|
15
|
+
import org.embulk.exec.SamplingParserPlugin;
|
16
|
+
import org.embulk.exec.NoSampleException;
|
14
17
|
|
15
18
|
public class FileInputRunner
|
16
19
|
implements InputPlugin
|
@@ -60,6 +63,7 @@ public class FileInputRunner
|
|
60
63
|
return fileInputPlugin.transaction(config, new RunnerControl(task, control));
|
61
64
|
}
|
62
65
|
|
66
|
+
@Override
|
63
67
|
public ConfigDiff resume(TaskSource taskSource,
|
64
68
|
Schema schema, int taskCount,
|
65
69
|
InputPlugin.Control control)
|
@@ -68,6 +72,18 @@ public class FileInputRunner
|
|
68
72
|
return fileInputPlugin.resume(task.getFileInputTaskSource(), taskCount, new RunnerControl(task, control));
|
69
73
|
}
|
70
74
|
|
75
|
+
@Override
|
76
|
+
public ConfigDiff guess(ConfigSource config)
|
77
|
+
{
|
78
|
+
Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
|
79
|
+
if (sample.limit() == 0) {
|
80
|
+
throw new NoSampleException("Can't get sample data because the first input file is empty");
|
81
|
+
}
|
82
|
+
|
83
|
+
GuessExecutor guessExecutor = Exec.session().getInjector().getInstance(GuessExecutor.class);
|
84
|
+
return guessExecutor.guessParserConfig(sample, config, Exec.session().getExecConfig());
|
85
|
+
}
|
86
|
+
|
71
87
|
private class RunnerControl
|
72
88
|
implements FileInputPlugin.Control
|
73
89
|
{
|
@@ -80,7 +80,8 @@ public class FileInputInputStream
|
|
80
80
|
@Override
|
81
81
|
public long skip(long len)
|
82
82
|
{
|
83
|
-
|
83
|
+
int skipped = read(null, 0, (int) Math.min(len, Integer.MAX_VALUE));
|
84
|
+
return skipped > 0 ? skipped : 0;
|
84
85
|
}
|
85
86
|
|
86
87
|
private boolean nextBuffer()
|
@@ -64,4 +64,26 @@ public class TestFileInputInputStream
|
|
64
64
|
assertEquals(expected.length, pos);
|
65
65
|
assertArrayEquals(expected, actual);
|
66
66
|
}
|
67
|
+
|
68
|
+
@Test
|
69
|
+
public void testSkipReturnsZeroForNoData() {
|
70
|
+
FileInputInputStream in = new FileInputInputStream(new MockFileInput());
|
71
|
+
assertEquals("Verify skip() returns 0 when there is no data.", 0L, in.skip(1));
|
72
|
+
}
|
73
|
+
|
74
|
+
private static class MockFileInput implements FileInput {
|
75
|
+
@Override
|
76
|
+
public boolean nextFile() {
|
77
|
+
return false;
|
78
|
+
}
|
79
|
+
|
80
|
+
@Override
|
81
|
+
public Buffer poll() {
|
82
|
+
return null;
|
83
|
+
}
|
84
|
+
|
85
|
+
@Override
|
86
|
+
public void close() {
|
87
|
+
}
|
88
|
+
}
|
67
89
|
}
|