embulk 0.4.10 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +4 -3
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +22 -3
- data/embulk-core/src/main/java/org/embulk/exec/ForGuess.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +57 -31
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +1 -1
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +6 -5
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +14 -10
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +2 -1
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +22 -0
- data/embulk-docs/plugins/index.html.erb +2 -2
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +3 -3
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.5.0.rst +81 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +13 -1
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +9 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +68 -11
- data/lib/embulk/column.rb +31 -8
- data/lib/embulk/command/embulk_new_plugin.rb +30 -22
- data/lib/embulk/command/embulk_run.rb +16 -3
- data/lib/embulk/data/new/README.md.erb +37 -2
- data/lib/embulk/data/new/java/input.java.erb +14 -0
- data/lib/embulk/data/new/java/output.java.erb +4 -0
- data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +25 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +11 -1
- data/lib/embulk/data/new/ruby/parser_guess.rb.erb +65 -0
- data/lib/embulk/guess/csv.rb +7 -81
- data/lib/embulk/guess/schema_guess.rb +107 -0
- data/lib/embulk/guess/time_format_guess.rb +2 -1
- data/lib/embulk/guess_plugin.rb +20 -0
- data/lib/embulk/input_plugin.rb +10 -0
- data/lib/embulk/schema.rb +9 -2
- data/lib/embulk/version.rb +1 -1
- data/test/guess/test_schema_guess.rb +11 -0
- data/test/helper.rb +1 -2
- metadata +11 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5579fdedc41918d4b640030c9f2c8b088e3687ce
|
4
|
+
data.tar.gz: 0c01cabaca3edfee8c64a4add44a824b000d7a10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4c53e8d4452494e615d6cf0e8600a9874512fa2ca7ed7e024621438fb8a88a358fe91d800c4c8fe6516cc6cf5cd0bb29f3499fe431007deda40e0a9b1fadece
|
7
|
+
data.tar.gz: eea74616fed84cbbefd494a457748c533f9f8551ae240417b7741cba10497cd7a7e1c319410b7738e3f790785527c025c627390593cfcdeb132506359fa9eea9
|
data/README.md
CHANGED
@@ -28,7 +28,7 @@ The single-file package is the simplest way to try Embulk. You can download the
|
|
28
28
|
Following 4 commands install embulk to your home directory:
|
29
29
|
|
30
30
|
```
|
31
|
-
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.
|
31
|
+
curl --create-dirs -o ~/.embulk/bin/embulk -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.0.jar
|
32
32
|
chmod +x ~/.embulk/bin/embulk
|
33
33
|
echo 'export PATH="$HOME/.embulk/bin:$PATH"' >> ~/.bashrc
|
34
34
|
source ~/.bashrc
|
@@ -39,7 +39,7 @@ source ~/.bashrc
|
|
39
39
|
You can assume the jar file is a .bat file.
|
40
40
|
|
41
41
|
```
|
42
|
-
curl -o embulk.bat -L https://bintray.com/artifact/download/embulk/maven/embulk-0.
|
42
|
+
curl -o embulk.bat -L https://bintray.com/artifact/download/embulk/maven/embulk-0.5.0.jar
|
43
43
|
```
|
44
44
|
|
45
45
|
### Trying examples
|
data/build.gradle
CHANGED
@@ -12,7 +12,7 @@ def release_projects = [project(":embulk-core"), project(":embulk-standards")]
|
|
12
12
|
|
13
13
|
allprojects {
|
14
14
|
group = 'org.embulk'
|
15
|
-
version = '0.
|
15
|
+
version = '0.5.0'
|
16
16
|
|
17
17
|
apply plugin: 'java'
|
18
18
|
apply plugin: 'maven-publish'
|
@@ -171,9 +171,10 @@ project(':embulk-cli') {
|
|
171
171
|
}
|
172
172
|
|
173
173
|
task classpath(type: Copy) {
|
174
|
-
|
174
|
+
File dest = file("${rootProject.projectDir}/classpath")
|
175
|
+
doFirst { dest.deleteDir() }
|
175
176
|
from configurations.runtime
|
176
|
-
into
|
177
|
+
into dest
|
177
178
|
}
|
178
179
|
}
|
179
180
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
package org.embulk.command;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
+
import java.util.ArrayList;
|
4
5
|
import java.util.Map;
|
5
6
|
import java.util.HashMap;
|
6
7
|
import java.io.File;
|
@@ -19,6 +20,7 @@ import org.embulk.config.ConfigLoader;
|
|
19
20
|
import org.embulk.config.ConfigDiff;
|
20
21
|
import org.embulk.config.ModelManager;
|
21
22
|
import org.embulk.config.ConfigException;
|
23
|
+
import org.embulk.plugin.PluginType;
|
22
24
|
import org.embulk.exec.LocalExecutor;
|
23
25
|
import org.embulk.exec.ExecutionResult;
|
24
26
|
import org.embulk.exec.GuessExecutor;
|
@@ -45,6 +47,9 @@ public class Runner
|
|
45
47
|
|
46
48
|
private String previewOutputFormat;
|
47
49
|
public String getPreviewOutputFormat() { return previewOutputFormat; };
|
50
|
+
|
51
|
+
private List<PluginType> guessPlugins;
|
52
|
+
public List<PluginType> getGuessPlugins() { return guessPlugins; }
|
48
53
|
}
|
49
54
|
|
50
55
|
private final Options options;
|
@@ -57,12 +62,26 @@ public class Runner
|
|
57
62
|
ModelManager bootstrapModelManager = new ModelManager(null, new ObjectMapper());
|
58
63
|
this.options = bootstrapModelManager.readObject(Options.class, optionJson);
|
59
64
|
this.systemConfig = new ConfigLoader(bootstrapModelManager).fromPropertiesYamlLiteral(System.getProperties(), "embulk.");
|
65
|
+
mergeOptionsToSystemConfig(options, systemConfig);
|
66
|
+
this.service = new EmbulkService(systemConfig);
|
67
|
+
this.injector = service.getInjector();
|
68
|
+
}
|
69
|
+
|
70
|
+
@SuppressWarnings("unchecked")
|
71
|
+
private void mergeOptionsToSystemConfig(Options options, ConfigSource systemConfig)
|
72
|
+
{
|
60
73
|
String logLevel = options.getLogLevel();
|
61
74
|
if (logLevel != null) {
|
62
|
-
systemConfig.set("
|
75
|
+
systemConfig.set("log_level", logLevel);
|
76
|
+
}
|
77
|
+
|
78
|
+
List<PluginType> guessPlugins = options.getGuessPlugins();
|
79
|
+
if (guessPlugins != null && !guessPlugins.isEmpty()) {
|
80
|
+
List<PluginType> list = new ArrayList<PluginType>() { };
|
81
|
+
list = systemConfig.get((Class<List<PluginType>>) list.getClass(), "guess_plugins", list);
|
82
|
+
list.addAll(guessPlugins);
|
83
|
+
systemConfig.set("guess_plugins", list);
|
63
84
|
}
|
64
|
-
this.service = new EmbulkService(systemConfig);
|
65
|
-
this.injector = service.getInjector();
|
66
85
|
}
|
67
86
|
|
68
87
|
public void main(String command, String[] args)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import javax.inject.Qualifier;
|
4
|
+
import java.lang.annotation.Retention;
|
5
|
+
import java.lang.annotation.Target;
|
6
|
+
import static java.lang.annotation.ElementType.FIELD;
|
7
|
+
import static java.lang.annotation.ElementType.METHOD;
|
8
|
+
import static java.lang.annotation.ElementType.PARAMETER;
|
9
|
+
import static java.lang.annotation.RetentionPolicy.RUNTIME;
|
10
|
+
|
11
|
+
@Retention(RUNTIME)
|
12
|
+
@Target({FIELD, PARAMETER, METHOD})
|
13
|
+
@Qualifier
|
14
|
+
public @interface ForGuess
|
15
|
+
{
|
16
|
+
}
|
@@ -1,11 +1,15 @@
|
|
1
1
|
package org.embulk.exec;
|
2
2
|
|
3
3
|
import java.util.List;
|
4
|
+
import java.util.Set;
|
4
5
|
import java.util.ArrayList;
|
5
6
|
import com.google.common.collect.ImmutableList;
|
7
|
+
import com.google.common.base.Throwables;
|
6
8
|
import com.google.inject.Inject;
|
7
9
|
import com.google.inject.Injector;
|
8
|
-
import com.google.
|
10
|
+
import com.google.inject.Binder;
|
11
|
+
import com.google.inject.multibindings.Multibinder;
|
12
|
+
import org.embulk.plugin.PluginType;
|
9
13
|
import org.embulk.config.Config;
|
10
14
|
import org.embulk.config.ConfigDefault;
|
11
15
|
import org.embulk.config.ConfigDiff;
|
@@ -14,7 +18,6 @@ import org.embulk.config.Task;
|
|
14
18
|
import org.embulk.config.TaskSource;
|
15
19
|
import org.embulk.config.ConfigSource;
|
16
20
|
import org.embulk.config.CommitReport;
|
17
|
-
import org.embulk.plugin.PluginType;
|
18
21
|
import org.embulk.spi.Schema;
|
19
22
|
import org.embulk.spi.Column;
|
20
23
|
import org.embulk.spi.Page;
|
@@ -33,10 +36,16 @@ import org.embulk.spi.FileInputRunner;
|
|
33
36
|
|
34
37
|
public class GuessExecutor
|
35
38
|
{
|
36
|
-
private final Injector injector;
|
37
|
-
private final ConfigSource systemConfig;
|
38
39
|
private final List<PluginType> defaultGuessPlugins;
|
39
40
|
|
41
|
+
private interface GuessExecutorSystemTask
|
42
|
+
extends Task
|
43
|
+
{
|
44
|
+
@Config("guess_plugins")
|
45
|
+
@ConfigDefault("[]")
|
46
|
+
public List<PluginType> getGuessPlugins();
|
47
|
+
}
|
48
|
+
|
40
49
|
private interface GuessExecutorTask
|
41
50
|
extends Task
|
42
51
|
{
|
@@ -49,19 +58,22 @@ public class GuessExecutor
|
|
49
58
|
public List<PluginType> getExcludeGuessPlugins();
|
50
59
|
}
|
51
60
|
|
61
|
+
public static void registerDefaultGuessPluginTo(Binder binder, PluginType type)
|
62
|
+
{
|
63
|
+
Multibinder<PluginType> multibinder = Multibinder.newSetBinder(binder, PluginType.class, ForGuess.class);
|
64
|
+
multibinder.addBinding().toInstance(type);
|
65
|
+
}
|
66
|
+
|
52
67
|
@Inject
|
53
|
-
public GuessExecutor(
|
54
|
-
@
|
68
|
+
public GuessExecutor(@ForSystemConfig ConfigSource systemConfig,
|
69
|
+
@ForGuess Set<PluginType> defaultGuessPlugins)
|
55
70
|
{
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
new PluginType("charset"),
|
63
|
-
new PluginType("newline"),
|
64
|
-
new PluginType("csv"));
|
71
|
+
GuessExecutorSystemTask systemTask = systemConfig.loadConfig(GuessExecutorSystemTask.class);
|
72
|
+
|
73
|
+
ImmutableList.Builder<PluginType> list = ImmutableList.builder();
|
74
|
+
list.addAll(defaultGuessPlugins);
|
75
|
+
list.addAll(systemTask.getGuessPlugins());
|
76
|
+
this.defaultGuessPlugins = list.build();
|
65
77
|
}
|
66
78
|
|
67
79
|
public ConfigDiff guess(ExecSession exec, final ConfigSource config)
|
@@ -80,29 +92,50 @@ public class GuessExecutor
|
|
80
92
|
}
|
81
93
|
}
|
82
94
|
|
95
|
+
protected InputPlugin newInputPlugin(ConfigSource inputConfig)
|
96
|
+
{
|
97
|
+
return Exec.newPlugin(InputPlugin.class, inputConfig.get(PluginType.class, "type"));
|
98
|
+
}
|
99
|
+
|
83
100
|
private ConfigDiff doGuess(ConfigSource config)
|
84
101
|
{
|
85
|
-
|
86
|
-
|
87
|
-
|
102
|
+
ConfigSource inputConfig = config.getNested("in");
|
103
|
+
|
104
|
+
InputPlugin input = newInputPlugin(inputConfig);
|
105
|
+
|
106
|
+
ConfigDiff inputGuessed;
|
107
|
+
try {
|
108
|
+
inputGuessed = input.guess(inputConfig);
|
109
|
+
} catch (AbstractMethodError ex) {
|
110
|
+
// for backward compatibility with embulk v0.4 interface
|
111
|
+
throw new UnsupportedOperationException(input.getClass().getSimpleName()+".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
|
88
112
|
}
|
89
113
|
|
114
|
+
ConfigDiff wrapped = Exec.newConfigDiff();
|
115
|
+
wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
|
116
|
+
return wrapped;
|
117
|
+
}
|
118
|
+
|
119
|
+
// called by FileInputRunner
|
120
|
+
public ConfigDiff guessParserConfig(Buffer sample, ConfigSource inputConfig, ConfigSource execConfig)
|
121
|
+
{
|
90
122
|
List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
|
91
|
-
|
123
|
+
|
124
|
+
GuessExecutorTask task = execConfig.loadConfig(GuessExecutorTask.class);
|
92
125
|
guessPlugins.addAll(task.getGuessPlugins());
|
93
126
|
guessPlugins.removeAll(task.getExcludeGuessPlugins());
|
94
127
|
|
95
|
-
return
|
128
|
+
return guessParserConfig(sample, inputConfig, guessPlugins);
|
96
129
|
}
|
97
130
|
|
98
|
-
private ConfigDiff
|
131
|
+
private ConfigDiff guessParserConfig(Buffer sample,
|
99
132
|
ConfigSource config, List<PluginType> guessPlugins)
|
100
133
|
{
|
101
134
|
// repeat guessing upto 10 times
|
102
135
|
ConfigDiff lastGuessed = Exec.newConfigDiff();
|
103
136
|
for (int i=0; i < 10; i++) {
|
104
137
|
// include last-guessed config to run guess input
|
105
|
-
ConfigSource originalConfig = config.
|
138
|
+
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
|
106
139
|
ConfigSource guessInputConfig = originalConfig.deepCopy();
|
107
140
|
guessInputConfig.getNestedOrSetEmpty("parser")
|
108
141
|
.set("type", "system_guess") // override in.parser.type so that FileInputPlugin creates GuessParserPlugin
|
@@ -142,19 +175,12 @@ public class GuessExecutor
|
|
142
175
|
// merge to the last-guessed config
|
143
176
|
if (lastGuessed.equals(guessed)) {
|
144
177
|
// not changed
|
145
|
-
return
|
178
|
+
return lastGuessed;
|
146
179
|
}
|
147
180
|
lastGuessed = guessed;
|
148
181
|
}
|
149
182
|
|
150
|
-
return
|
151
|
-
}
|
152
|
-
|
153
|
-
private static ConfigDiff wrapInIn(ConfigDiff lastGuessed)
|
154
|
-
{
|
155
|
-
ConfigDiff wrapped = Exec.newConfigDiff();
|
156
|
-
wrapped.getNestedOrSetEmpty("in").merge(lastGuessed);
|
157
|
-
return wrapped;
|
183
|
+
return lastGuessed;
|
158
184
|
}
|
159
185
|
|
160
186
|
private static class BufferFileInputPlugin
|
@@ -18,7 +18,7 @@ public class LoggerProvider
|
|
18
18
|
Properties prop = new Properties();
|
19
19
|
|
20
20
|
final String level;
|
21
|
-
String logLevel = systemConfig.get(String.class, "
|
21
|
+
String logLevel = systemConfig.get(String.class, "log_level", "info"); // here can't use loadConfig because ModelManager uses LoggerProvider
|
22
22
|
switch (logLevel) {
|
23
23
|
case "fatal": level = "FATAL"; break;
|
24
24
|
case "error": level = "ERROR"; break;
|
@@ -13,13 +13,15 @@ import org.embulk.spi.Buffer;
|
|
13
13
|
import org.embulk.spi.InputPlugin;
|
14
14
|
import org.embulk.spi.ParserPlugin;
|
15
15
|
import org.embulk.spi.FileInput;
|
16
|
+
import org.embulk.spi.FileInputRunner;
|
16
17
|
import org.embulk.spi.PageOutput;
|
18
|
+
import org.embulk.exec.ForSystemConfig;
|
17
19
|
import static org.embulk.spi.util.Inputs.each;
|
18
20
|
|
19
21
|
/*
|
20
|
-
* Used by
|
22
|
+
* Used by FileInputRunner.guess
|
21
23
|
*/
|
22
|
-
class SamplingParserPlugin
|
24
|
+
public class SamplingParserPlugin
|
23
25
|
implements ParserPlugin
|
24
26
|
{
|
25
27
|
private final int maxSampleSize;
|
@@ -44,13 +46,12 @@ class SamplingParserPlugin
|
|
44
46
|
throw new SampledNoticeError(buffer);
|
45
47
|
}
|
46
48
|
|
47
|
-
static Buffer runFileInputSampling(ConfigSource
|
49
|
+
public static Buffer runFileInputSampling(final FileInputRunner input, ConfigSource inputConfig)
|
48
50
|
{
|
49
51
|
// override in.parser.type so that FileInputRunner creates GuessParserPlugin
|
50
|
-
ConfigSource samplingInputConfig =
|
52
|
+
ConfigSource samplingInputConfig = inputConfig.deepCopy();
|
51
53
|
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
52
54
|
|
53
|
-
final InputPlugin input = Exec.newPlugin(InputPlugin.class, samplingInputConfig.get(PluginType.class, "type"));
|
54
55
|
try {
|
55
56
|
input.transaction(samplingInputConfig, new InputPlugin.Control() {
|
56
57
|
public List<CommitReport> run(TaskSource taskSource, Schema schema, int taskCount)
|
@@ -27,8 +27,11 @@ public class ExecSession
|
|
27
27
|
private final ModelManager modelManager;
|
28
28
|
private final PluginManager pluginManager;
|
29
29
|
private final BufferAllocator bufferAllocator;
|
30
|
+
|
31
|
+
private final ConfigSource execConfig;
|
30
32
|
private final Timestamp transactionTime;
|
31
33
|
private final DateTimeZone transactionTimeZone;
|
34
|
+
|
32
35
|
private final boolean preview;
|
33
36
|
|
34
37
|
public interface SessionTask
|
@@ -44,16 +47,6 @@ public class ExecSession
|
|
44
47
|
}
|
45
48
|
|
46
49
|
public ExecSession(Injector injector, ConfigSource execConfig)
|
47
|
-
{
|
48
|
-
this(injector, execConfig.loadConfig(SessionTask.class));
|
49
|
-
}
|
50
|
-
|
51
|
-
public ExecSession(Injector injector, TaskSource taskSource)
|
52
|
-
{
|
53
|
-
this(injector, taskSource.loadTask(SessionTask.class));
|
54
|
-
}
|
55
|
-
|
56
|
-
public ExecSession(Injector injector, SessionTask task)
|
57
50
|
{
|
58
51
|
this.injector = injector;
|
59
52
|
this.loggerFactory = injector.getInstance(ILoggerFactory.class);
|
@@ -61,8 +54,11 @@ public class ExecSession
|
|
61
54
|
this.pluginManager = injector.getInstance(PluginManager.class);
|
62
55
|
this.bufferAllocator = injector.getInstance(BufferAllocator.class);
|
63
56
|
|
57
|
+
this.execConfig = execConfig.deepCopy();
|
58
|
+
SessionTask task = execConfig.loadConfig(SessionTask.class);
|
64
59
|
this.transactionTime = task.getTransactionTime().or(Timestamp.ofEpochMilli(System.currentTimeMillis())); // TODO get nanoseconds for default
|
65
60
|
this.transactionTimeZone = task.getTransactionTimeZone();
|
61
|
+
|
66
62
|
this.preview = false;
|
67
63
|
}
|
68
64
|
|
@@ -73,8 +69,11 @@ public class ExecSession
|
|
73
69
|
this.modelManager = copy.modelManager;
|
74
70
|
this.pluginManager = copy.pluginManager;
|
75
71
|
this.bufferAllocator = copy.bufferAllocator;
|
72
|
+
|
73
|
+
this.execConfig = copy.execConfig;
|
76
74
|
this.transactionTime = copy.transactionTime;
|
77
75
|
this.transactionTimeZone = copy.transactionTimeZone;
|
76
|
+
|
78
77
|
this.preview = preview;
|
79
78
|
}
|
80
79
|
|
@@ -120,6 +119,11 @@ public class ExecSession
|
|
120
119
|
return bufferAllocator;
|
121
120
|
}
|
122
121
|
|
122
|
+
public ConfigSource getExecConfig()
|
123
|
+
{
|
124
|
+
return execConfig;
|
125
|
+
}
|
126
|
+
|
123
127
|
public <T> T newPlugin(Class<T> iface, PluginType type)
|
124
128
|
{
|
125
129
|
return pluginManager.newPlugin(iface, type);
|
@@ -11,6 +11,9 @@ import org.embulk.config.Config;
|
|
11
11
|
import org.embulk.config.ConfigDefault;
|
12
12
|
import org.embulk.plugin.PluginType;
|
13
13
|
import org.embulk.spi.util.Decoders;
|
14
|
+
import org.embulk.exec.GuessExecutor;
|
15
|
+
import org.embulk.exec.SamplingParserPlugin;
|
16
|
+
import org.embulk.exec.NoSampleException;
|
14
17
|
|
15
18
|
public class FileInputRunner
|
16
19
|
implements InputPlugin
|
@@ -60,6 +63,7 @@ public class FileInputRunner
|
|
60
63
|
return fileInputPlugin.transaction(config, new RunnerControl(task, control));
|
61
64
|
}
|
62
65
|
|
66
|
+
@Override
|
63
67
|
public ConfigDiff resume(TaskSource taskSource,
|
64
68
|
Schema schema, int taskCount,
|
65
69
|
InputPlugin.Control control)
|
@@ -68,6 +72,18 @@ public class FileInputRunner
|
|
68
72
|
return fileInputPlugin.resume(task.getFileInputTaskSource(), taskCount, new RunnerControl(task, control));
|
69
73
|
}
|
70
74
|
|
75
|
+
@Override
|
76
|
+
public ConfigDiff guess(ConfigSource config)
|
77
|
+
{
|
78
|
+
Buffer sample = SamplingParserPlugin.runFileInputSampling(this, config);
|
79
|
+
if (sample.limit() == 0) {
|
80
|
+
throw new NoSampleException("Can't get sample data because the first input file is empty");
|
81
|
+
}
|
82
|
+
|
83
|
+
GuessExecutor guessExecutor = Exec.session().getInjector().getInstance(GuessExecutor.class);
|
84
|
+
return guessExecutor.guessParserConfig(sample, config, Exec.session().getExecConfig());
|
85
|
+
}
|
86
|
+
|
71
87
|
private class RunnerControl
|
72
88
|
implements FileInputPlugin.Control
|
73
89
|
{
|
@@ -80,7 +80,8 @@ public class FileInputInputStream
|
|
80
80
|
@Override
|
81
81
|
public long skip(long len)
|
82
82
|
{
|
83
|
-
|
83
|
+
int skipped = read(null, 0, (int) Math.min(len, Integer.MAX_VALUE));
|
84
|
+
return skipped > 0 ? skipped : 0;
|
84
85
|
}
|
85
86
|
|
86
87
|
private boolean nextBuffer()
|
@@ -64,4 +64,26 @@ public class TestFileInputInputStream
|
|
64
64
|
assertEquals(expected.length, pos);
|
65
65
|
assertArrayEquals(expected, actual);
|
66
66
|
}
|
67
|
+
|
68
|
+
@Test
|
69
|
+
public void testSkipReturnsZeroForNoData() {
|
70
|
+
FileInputInputStream in = new FileInputInputStream(new MockFileInput());
|
71
|
+
assertEquals("Verify skip() returns 0 when there is no data.", 0L, in.skip(1));
|
72
|
+
}
|
73
|
+
|
74
|
+
private static class MockFileInput implements FileInput {
|
75
|
+
@Override
|
76
|
+
public boolean nextFile() {
|
77
|
+
return false;
|
78
|
+
}
|
79
|
+
|
80
|
+
@Override
|
81
|
+
public Buffer poll() {
|
82
|
+
return null;
|
83
|
+
}
|
84
|
+
|
85
|
+
@Override
|
86
|
+
public void close() {
|
87
|
+
}
|
88
|
+
}
|
67
89
|
}
|