embulk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import org.embulk.config.NextConfig;
|
5
|
+
|
6
|
+
public class ExecuteResult
|
7
|
+
{
|
8
|
+
private final NextConfig nextConfig;
|
9
|
+
|
10
|
+
public ExecuteResult(NextConfig nextConfig)
|
11
|
+
{
|
12
|
+
this.nextConfig = nextConfig;
|
13
|
+
}
|
14
|
+
|
15
|
+
public NextConfig getNextConfig()
|
16
|
+
{
|
17
|
+
return nextConfig;
|
18
|
+
}
|
19
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.ServiceLoader;
|
4
|
+
import com.google.inject.Module;
|
5
|
+
import com.google.inject.Binder;
|
6
|
+
import org.embulk.config.ConfigSource;
|
7
|
+
import org.embulk.spi.Extension;
|
8
|
+
|
9
|
+
/**
|
10
|
+
* ExtensionServiceLoaderModule loads Extensions using java.util.ServiceLoader
|
11
|
+
* mechanism.
|
12
|
+
* Jar packages providing an extension need to include
|
13
|
+
* META-INF/services/org.embulk.exec.Extension file. Contents of the file is
|
14
|
+
* one-line text of the extension class name (e.g. com.example.MyPluginSourceExtension).
|
15
|
+
*/
|
16
|
+
public class ExtensionServiceLoaderModule
|
17
|
+
implements Module
|
18
|
+
{
|
19
|
+
private final ClassLoader classLoader;
|
20
|
+
private final ConfigSource systemConfig;
|
21
|
+
|
22
|
+
public ExtensionServiceLoaderModule(ConfigSource systemConfig)
|
23
|
+
{
|
24
|
+
this(ExtensionServiceLoaderModule.class.getClassLoader(), systemConfig);
|
25
|
+
}
|
26
|
+
|
27
|
+
public ExtensionServiceLoaderModule(ClassLoader classLoader, ConfigSource systemConfig)
|
28
|
+
{
|
29
|
+
this.classLoader = classLoader;
|
30
|
+
this.systemConfig = systemConfig;
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public void configure(Binder binder)
|
35
|
+
{
|
36
|
+
ServiceLoader<Extension> serviceLoader = ServiceLoader.load(Extension.class, classLoader);
|
37
|
+
for (Extension extension : serviceLoader) {
|
38
|
+
for (Module module : extension.getModules(systemConfig)) {
|
39
|
+
module.configure(binder);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import javax.inject.Qualifier;
|
4
|
+
import java.lang.annotation.Retention;
|
5
|
+
import java.lang.annotation.Target;
|
6
|
+
import static java.lang.annotation.ElementType.FIELD;
|
7
|
+
import static java.lang.annotation.ElementType.METHOD;
|
8
|
+
import static java.lang.annotation.ElementType.PARAMETER;
|
9
|
+
import static java.lang.annotation.RetentionPolicy.RUNTIME;
|
10
|
+
|
11
|
+
@Retention(RUNTIME)
|
12
|
+
@Target({FIELD, PARAMETER, METHOD})
|
13
|
+
@Qualifier
|
14
|
+
public @interface ForSystemConfig
|
15
|
+
{
|
16
|
+
}
|
@@ -0,0 +1,307 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import com.google.common.collect.ImmutableList;
|
6
|
+
import com.google.inject.Inject;
|
7
|
+
import com.google.inject.Injector;
|
8
|
+
import com.google.common.base.Throwables;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
11
|
+
import org.embulk.config.NextConfig;
|
12
|
+
import org.embulk.config.DataSource;
|
13
|
+
import org.embulk.config.Task;
|
14
|
+
import org.embulk.config.TaskSource;
|
15
|
+
import org.embulk.config.ConfigSource;
|
16
|
+
import org.embulk.config.CommitReport;
|
17
|
+
import org.embulk.plugin.PluginType;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.Column;
|
20
|
+
import org.embulk.spi.Page;
|
21
|
+
import org.embulk.spi.Buffer;
|
22
|
+
import org.embulk.spi.InputPlugin;
|
23
|
+
import org.embulk.spi.FileInputPlugin;
|
24
|
+
import org.embulk.spi.ParserPlugin;
|
25
|
+
import org.embulk.spi.GuessPlugin;
|
26
|
+
import org.embulk.spi.Exec;
|
27
|
+
import org.embulk.spi.ExecAction;
|
28
|
+
import org.embulk.spi.ExecSession;
|
29
|
+
import org.embulk.spi.FileInput;
|
30
|
+
import org.embulk.spi.PageOutput;
|
31
|
+
import org.embulk.spi.TransactionalFileInput;
|
32
|
+
import org.embulk.spi.FileInputRunner;
|
33
|
+
|
34
|
+
public class GuessExecutor
|
35
|
+
{
|
36
|
+
private final Injector injector;
|
37
|
+
private final ConfigSource systemConfig;
|
38
|
+
private final List<PluginType> defaultGuessPlugins;
|
39
|
+
|
40
|
+
private interface GuessExecutorTask
|
41
|
+
extends Task
|
42
|
+
{
|
43
|
+
@Config("guess_plugins")
|
44
|
+
@ConfigDefault("[]")
|
45
|
+
public List<PluginType> getGuessPlugins();
|
46
|
+
|
47
|
+
@Config("exclude_guess_plugins")
|
48
|
+
@ConfigDefault("[]")
|
49
|
+
public List<PluginType> getExcludeGuessPlugins();
|
50
|
+
}
|
51
|
+
|
52
|
+
@Inject
|
53
|
+
public GuessExecutor(Injector injector,
|
54
|
+
@ForSystemConfig ConfigSource systemConfig)
|
55
|
+
{
|
56
|
+
this.injector = injector;
|
57
|
+
this.systemConfig = systemConfig;
|
58
|
+
|
59
|
+
// TODO get default guess plugins from injector using Multibinder
|
60
|
+
this.defaultGuessPlugins = ImmutableList.of(
|
61
|
+
new PluginType("gzip"),
|
62
|
+
new PluginType("charset"),
|
63
|
+
new PluginType("newline"),
|
64
|
+
new PluginType("csv"));
|
65
|
+
}
|
66
|
+
|
67
|
+
public NextConfig guess(ExecSession exec, final ConfigSource config)
|
68
|
+
{
|
69
|
+
try {
|
70
|
+
return Exec.doWith(exec, new ExecAction<NextConfig>() {
|
71
|
+
public NextConfig run()
|
72
|
+
{
|
73
|
+
return doGuess(config);
|
74
|
+
}
|
75
|
+
});
|
76
|
+
} catch (Exception ex) {
|
77
|
+
throw Throwables.propagate(ex);
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
private NextConfig doGuess(ConfigSource config)
|
82
|
+
{
|
83
|
+
Buffer sample = SamplingParserPlugin.runFileInputSampling(config);
|
84
|
+
if (sample.limit() == 0) {
|
85
|
+
throw new NoSampleException("Can't get sample data because the first input file is empty");
|
86
|
+
}
|
87
|
+
|
88
|
+
List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
|
89
|
+
GuessExecutorTask task = config.getNestedOrSetEmpty("exec").loadConfig(GuessExecutorTask.class);
|
90
|
+
guessPlugins.addAll(task.getGuessPlugins());
|
91
|
+
guessPlugins.removeAll(task.getExcludeGuessPlugins());
|
92
|
+
|
93
|
+
return runGuessInput(sample, config, guessPlugins);
|
94
|
+
}
|
95
|
+
|
96
|
+
private NextConfig runGuessInput(Buffer sample,
|
97
|
+
ConfigSource config, List<PluginType> guessPlugins)
|
98
|
+
{
|
99
|
+
// repeat guessing upto 10 times
|
100
|
+
NextConfig lastGuessed = Exec.newNextConfig();
|
101
|
+
for (int i=0; i < 10; i++) {
|
102
|
+
// include last-guessed config to run guess input
|
103
|
+
ConfigSource originalConfig = config.getNested("in").deepCopy().merge(lastGuessed);
|
104
|
+
ConfigSource guessInputConfig = originalConfig.deepCopy();
|
105
|
+
guessInputConfig.getNestedOrSetEmpty("parser")
|
106
|
+
.set("type", "system_guess") // override in.parser.type so that FileInputPlugin creates GuessParserPlugin
|
107
|
+
.set("guess_plugins", guessPlugins)
|
108
|
+
.set("orig_config", originalConfig);
|
109
|
+
|
110
|
+
// run FileInputPlugin
|
111
|
+
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
|
112
|
+
NextConfig guessed;
|
113
|
+
try {
|
114
|
+
input.transaction(guessInputConfig, new InputPlugin.Control() {
|
115
|
+
public List<CommitReport> run(TaskSource inputTaskSource, Schema schema, int processorCount)
|
116
|
+
{
|
117
|
+
// TODO repeat runwith processorIndex++ if NoSampleException happens
|
118
|
+
input.run(inputTaskSource, null, 0, new PageOutput() {
|
119
|
+
@Override
|
120
|
+
public void add(Page page)
|
121
|
+
{
|
122
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public void finish() { }
|
127
|
+
|
128
|
+
@Override
|
129
|
+
public void close() { }
|
130
|
+
});
|
131
|
+
throw new AssertionError("Guess executor must throw GuessedNoticeError");
|
132
|
+
}
|
133
|
+
});
|
134
|
+
throw new AssertionError("Guess executor must throw GuessedNoticeError");
|
135
|
+
|
136
|
+
} catch (GuessedNoticeError error) {
|
137
|
+
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
|
138
|
+
}
|
139
|
+
|
140
|
+
// merge to the last-guessed config
|
141
|
+
if (lastGuessed.equals(guessed)) {
|
142
|
+
// not changed
|
143
|
+
return wrapInIn(lastGuessed);
|
144
|
+
}
|
145
|
+
lastGuessed = guessed;
|
146
|
+
}
|
147
|
+
|
148
|
+
return wrapInIn(lastGuessed);
|
149
|
+
}
|
150
|
+
|
151
|
+
private static NextConfig wrapInIn(NextConfig lastGuessed)
|
152
|
+
{
|
153
|
+
NextConfig wrapped = Exec.newNextConfig();
|
154
|
+
wrapped.getNestedOrSetEmpty("in").merge(lastGuessed);
|
155
|
+
return wrapped;
|
156
|
+
}
|
157
|
+
|
158
|
+
private static class BufferFileInputPlugin
|
159
|
+
implements FileInputPlugin
|
160
|
+
{
|
161
|
+
private final Buffer buffer;
|
162
|
+
|
163
|
+
public BufferFileInputPlugin(Buffer buffer)
|
164
|
+
{
|
165
|
+
this.buffer = buffer;
|
166
|
+
}
|
167
|
+
|
168
|
+
public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
|
169
|
+
{
|
170
|
+
control.run(Exec.newTaskSource(), 1);
|
171
|
+
return Exec.newNextConfig();
|
172
|
+
}
|
173
|
+
|
174
|
+
public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
|
175
|
+
{
|
176
|
+
return new BufferTransactionalFileInput(buffer);
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
private static class BufferTransactionalFileInput
|
181
|
+
implements TransactionalFileInput
|
182
|
+
{
|
183
|
+
private Buffer buffer;
|
184
|
+
|
185
|
+
public BufferTransactionalFileInput(Buffer buffer)
|
186
|
+
{
|
187
|
+
this.buffer = buffer;
|
188
|
+
}
|
189
|
+
|
190
|
+
@Override
|
191
|
+
public Buffer poll()
|
192
|
+
{
|
193
|
+
Buffer b = buffer;
|
194
|
+
buffer = null;
|
195
|
+
return b;
|
196
|
+
}
|
197
|
+
|
198
|
+
@Override
|
199
|
+
public boolean nextFile()
|
200
|
+
{
|
201
|
+
return buffer != null;
|
202
|
+
}
|
203
|
+
|
204
|
+
@Override
|
205
|
+
public void close() { }
|
206
|
+
|
207
|
+
@Override
|
208
|
+
public void abort() { }
|
209
|
+
|
210
|
+
@Override
|
211
|
+
public CommitReport commit()
|
212
|
+
{
|
213
|
+
return null;
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
public static class GuessParserPlugin
|
218
|
+
implements ParserPlugin
|
219
|
+
{
|
220
|
+
private interface PluginTask
|
221
|
+
extends Task
|
222
|
+
{
|
223
|
+
@Config("guess_plugins")
|
224
|
+
public List<PluginType> getGuessPluginTypes();
|
225
|
+
|
226
|
+
@Config("orig_config")
|
227
|
+
public ConfigSource getOriginalConfig();
|
228
|
+
}
|
229
|
+
|
230
|
+
@Override
|
231
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
232
|
+
{
|
233
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
234
|
+
control.run(task.dump(), null);
|
235
|
+
}
|
236
|
+
|
237
|
+
@Override
|
238
|
+
public void run(TaskSource taskSource, Schema schema,
|
239
|
+
FileInput input, PageOutput pageOutput)
|
240
|
+
{
|
241
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
242
|
+
final ConfigSource originalConfig = task.getOriginalConfig();
|
243
|
+
|
244
|
+
// get sample buffer
|
245
|
+
Buffer sample = getFirstBuffer(input);
|
246
|
+
|
247
|
+
// load guess plugins
|
248
|
+
ImmutableList.Builder<GuessPlugin> builder = ImmutableList.builder();
|
249
|
+
for (PluginType guessType : task.getGuessPluginTypes()) {
|
250
|
+
GuessPlugin guess = Exec.newPlugin(GuessPlugin.class, guessType);
|
251
|
+
builder.add(guess);
|
252
|
+
}
|
253
|
+
List<GuessPlugin> guesses = builder.build();
|
254
|
+
|
255
|
+
// run guess plugins
|
256
|
+
ConfigSource mergedConfig = originalConfig.deepCopy();
|
257
|
+
NextConfig mergedGuessed = Exec.newNextConfig();
|
258
|
+
for (int i=0; i < guesses.size(); i++) {
|
259
|
+
NextConfig guessed = guesses.get(i).guess(originalConfig, sample);
|
260
|
+
mergedGuessed.merge(guessed);
|
261
|
+
mergedConfig.merge(mergedGuessed);
|
262
|
+
if (!mergedConfig.equals(originalConfig)) {
|
263
|
+
// config updated
|
264
|
+
throw new GuessedNoticeError(mergedGuessed);
|
265
|
+
}
|
266
|
+
}
|
267
|
+
throw new GuessedNoticeError(mergedGuessed);
|
268
|
+
}
|
269
|
+
|
270
|
+
private static Buffer getFirstBuffer(FileInput input)
|
271
|
+
{
|
272
|
+
RuntimeException decodeException = null;
|
273
|
+
try {
|
274
|
+
while (input.nextFile()) {
|
275
|
+
Buffer sample = input.poll();
|
276
|
+
if (sample != null) {
|
277
|
+
return sample;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
} catch (RuntimeException ex) {
|
281
|
+
// ignores exceptions because FileDecoderPlugin can throw exceptions
|
282
|
+
// such as "Unexpected end of ZLIB input stream"
|
283
|
+
decodeException = ex;
|
284
|
+
}
|
285
|
+
if (decodeException != null) {
|
286
|
+
throw decodeException;
|
287
|
+
}
|
288
|
+
throw new NoSampleException("No input buffer to guess");
|
289
|
+
}
|
290
|
+
}
|
291
|
+
|
292
|
+
public static class GuessedNoticeError
|
293
|
+
extends Error
|
294
|
+
{
|
295
|
+
private final NextConfig guessedConfig;
|
296
|
+
|
297
|
+
public GuessedNoticeError(NextConfig guessedConfig)
|
298
|
+
{
|
299
|
+
this.guessedConfig = guessedConfig;
|
300
|
+
}
|
301
|
+
|
302
|
+
public NextConfig getGuessedConfig()
|
303
|
+
{
|
304
|
+
return guessedConfig;
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}
|
@@ -0,0 +1,274 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.concurrent.Callable;
|
6
|
+
import java.util.concurrent.Future;
|
7
|
+
import java.util.concurrent.Executors;
|
8
|
+
import java.util.concurrent.ExecutorService;
|
9
|
+
import java.util.concurrent.ExecutionException;
|
10
|
+
import java.util.concurrent.atomic.AtomicInteger;
|
11
|
+
|
12
|
+
import com.google.common.collect.ImmutableList;
|
13
|
+
import com.google.inject.Inject;
|
14
|
+
import com.google.inject.Injector;
|
15
|
+
import com.google.common.base.Throwables;
|
16
|
+
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
17
|
+
import org.embulk.config.Task;
|
18
|
+
import org.embulk.config.Config;
|
19
|
+
import org.embulk.config.ConfigSource;
|
20
|
+
import org.embulk.config.TaskSource;
|
21
|
+
import org.embulk.config.NextConfig;
|
22
|
+
import org.embulk.config.CommitReport;
|
23
|
+
import org.embulk.plugin.PluginType;
|
24
|
+
import org.embulk.spi.Schema;
|
25
|
+
import org.embulk.spi.Exec;
|
26
|
+
import org.embulk.spi.ExecSession;
|
27
|
+
import org.embulk.spi.ExecAction;
|
28
|
+
import org.embulk.spi.InputPlugin;
|
29
|
+
import org.embulk.spi.OutputPlugin;
|
30
|
+
import org.embulk.spi.TransactionalPageOutput;
|
31
|
+
import org.slf4j.Logger;
|
32
|
+
|
33
|
+
public class LocalExecutor
|
34
|
+
{
|
35
|
+
private final Injector injector;
|
36
|
+
private final ConfigSource systemConfig;
|
37
|
+
private final int maxThreads;
|
38
|
+
private final ExecutorService executor;
|
39
|
+
|
40
|
+
private Logger log;
|
41
|
+
private final AtomicInteger runningTaskCount;
|
42
|
+
private final AtomicInteger completedTaskCount;
|
43
|
+
|
44
|
+
public interface ExecutorTask
|
45
|
+
extends Task
|
46
|
+
{
|
47
|
+
@Config("in")
|
48
|
+
public ConfigSource getInputConfig();
|
49
|
+
|
50
|
+
@Config("out")
|
51
|
+
public ConfigSource getOutputConfig();
|
52
|
+
|
53
|
+
public TaskSource getInputTask();
|
54
|
+
public void setInputTask(TaskSource taskSource);
|
55
|
+
|
56
|
+
public TaskSource getOutputTask();
|
57
|
+
public void setOutputTask(TaskSource taskSource);
|
58
|
+
}
|
59
|
+
|
60
|
+
@Inject
|
61
|
+
public LocalExecutor(Injector injector,
|
62
|
+
@ForSystemConfig ConfigSource systemConfig)
|
63
|
+
{
|
64
|
+
this.injector = injector;
|
65
|
+
this.systemConfig = systemConfig;
|
66
|
+
|
67
|
+
int defaultMaxThreads = Runtime.getRuntime().availableProcessors() * 2;
|
68
|
+
this.maxThreads = systemConfig.get(Integer.class, "max_threads", defaultMaxThreads);
|
69
|
+
this.executor = Executors.newFixedThreadPool(maxThreads,
|
70
|
+
new ThreadFactoryBuilder()
|
71
|
+
.setNameFormat("embulk-executor-%d")
|
72
|
+
.setDaemon(true)
|
73
|
+
.build());
|
74
|
+
|
75
|
+
this.runningTaskCount = new AtomicInteger(0);
|
76
|
+
this.completedTaskCount = new AtomicInteger(0);
|
77
|
+
}
|
78
|
+
|
79
|
+
private static class ExecuteResultBuilder
|
80
|
+
{
|
81
|
+
private NextConfig inputNextConfig;
|
82
|
+
private NextConfig outputNextConfig;
|
83
|
+
|
84
|
+
public void setInputNextConfig(NextConfig inputNextConfig)
|
85
|
+
{
|
86
|
+
this.inputNextConfig = inputNextConfig;
|
87
|
+
}
|
88
|
+
|
89
|
+
public void setOutputNextConfig(NextConfig outputNextConfig)
|
90
|
+
{
|
91
|
+
this.outputNextConfig = outputNextConfig;
|
92
|
+
}
|
93
|
+
|
94
|
+
public NextConfig getInputNextConfig()
|
95
|
+
{
|
96
|
+
return inputNextConfig;
|
97
|
+
}
|
98
|
+
|
99
|
+
public NextConfig getOutputNextConfig()
|
100
|
+
{
|
101
|
+
return outputNextConfig;
|
102
|
+
}
|
103
|
+
|
104
|
+
public ExecuteResult build()
|
105
|
+
{
|
106
|
+
if (inputNextConfig == null) {
|
107
|
+
inputNextConfig = Exec.newNextConfig();
|
108
|
+
}
|
109
|
+
if (outputNextConfig == null) {
|
110
|
+
outputNextConfig = Exec.newNextConfig();
|
111
|
+
}
|
112
|
+
NextConfig nextConfig = inputNextConfig.deepCopy().merge(outputNextConfig);
|
113
|
+
return new ExecuteResult(nextConfig);
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
private static class ProcessResult
|
118
|
+
{
|
119
|
+
private final CommitReport inputCommitReport;
|
120
|
+
private final CommitReport outputCommitReport;
|
121
|
+
|
122
|
+
public ProcessResult(CommitReport inputCommitReport, CommitReport outputCommitReport)
|
123
|
+
{
|
124
|
+
this.inputCommitReport = inputCommitReport;
|
125
|
+
this.outputCommitReport = outputCommitReport;
|
126
|
+
}
|
127
|
+
|
128
|
+
public CommitReport getInputCommitReport()
|
129
|
+
{
|
130
|
+
return inputCommitReport;
|
131
|
+
}
|
132
|
+
|
133
|
+
public CommitReport getOutputCommitReport()
|
134
|
+
{
|
135
|
+
return outputCommitReport;
|
136
|
+
}
|
137
|
+
}
|
138
|
+
|
139
|
+
protected InputPlugin newInputPlugin(ExecutorTask task)
|
140
|
+
{
|
141
|
+
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
142
|
+
}
|
143
|
+
|
144
|
+
protected OutputPlugin newOutputPlugin(ExecutorTask task)
|
145
|
+
{
|
146
|
+
return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
|
147
|
+
}
|
148
|
+
|
149
|
+
public ExecuteResult run(ExecSession exec, final ConfigSource config)
|
150
|
+
{
|
151
|
+
log = exec.getLogger(LocalExecutor.class);
|
152
|
+
try {
|
153
|
+
return Exec.doWith(exec, new ExecAction<ExecuteResult>() {
|
154
|
+
public ExecuteResult run()
|
155
|
+
{
|
156
|
+
return doRun(config);
|
157
|
+
}
|
158
|
+
});
|
159
|
+
} catch (Exception ex) {
|
160
|
+
throw Throwables.propagate(ex);
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
private ExecuteResult doRun(ConfigSource config)
|
165
|
+
{
|
166
|
+
final ExecutorTask task = config.loadConfig(ExecutorTask.class);
|
167
|
+
|
168
|
+
final InputPlugin in = newInputPlugin(task);
|
169
|
+
final OutputPlugin out = newOutputPlugin(task);
|
170
|
+
|
171
|
+
final ExecuteResultBuilder execResult = new ExecuteResultBuilder();
|
172
|
+
|
173
|
+
NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
174
|
+
public List<CommitReport> run(final TaskSource inputTask, final Schema schema, final int processorCount)
|
175
|
+
{
|
176
|
+
final ImmutableList.Builder<CommitReport> inputCommitReports = ImmutableList.builder();
|
177
|
+
NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), schema, processorCount, new OutputPlugin.Control() {
|
178
|
+
public List<CommitReport> run(final TaskSource outputTask)
|
179
|
+
{
|
180
|
+
final ImmutableList.Builder<CommitReport> outputCommitReports = ImmutableList.builder();
|
181
|
+
task.setInputTask(inputTask);
|
182
|
+
task.setOutputTask(outputTask);
|
183
|
+
|
184
|
+
//log.debug("input: %s", task.getInputTask());
|
185
|
+
//log.debug("output: %s", task.getOutputTask());
|
186
|
+
|
187
|
+
List<ProcessResult> results = process(task.dump(), schema, processorCount);
|
188
|
+
for (ProcessResult result : results) {
|
189
|
+
inputCommitReports.add(result.getInputCommitReport());
|
190
|
+
outputCommitReports.add(result.getOutputCommitReport());
|
191
|
+
}
|
192
|
+
|
193
|
+
return outputCommitReports.build();
|
194
|
+
}
|
195
|
+
});
|
196
|
+
execResult.setOutputNextConfig(outputNextConfig);
|
197
|
+
return inputCommitReports.build();
|
198
|
+
}
|
199
|
+
});
|
200
|
+
execResult.setInputNextConfig(inputNextConfig);
|
201
|
+
|
202
|
+
return execResult.build();
|
203
|
+
}
|
204
|
+
|
205
|
+
private List<ProcessResult> process(TaskSource taskSource, Schema schema, int processorCount)
|
206
|
+
{
|
207
|
+
List<Future<ProcessResult>> futures = new ArrayList<>();
|
208
|
+
List<ProcessResult> joined = new ArrayList<>();
|
209
|
+
try {
|
210
|
+
log.info("Running {} tasks using {} local threads", processorCount, maxThreads);
|
211
|
+
showProgress(processorCount);
|
212
|
+
for (int i=0; i < processorCount; i++) {
|
213
|
+
futures.add(startProcessor(taskSource, schema, i));
|
214
|
+
}
|
215
|
+
|
216
|
+
for (int i=0; i < processorCount; i++) {
|
217
|
+
try {
|
218
|
+
joined.add(futures.get(i).get());
|
219
|
+
showProgress(processorCount);
|
220
|
+
|
221
|
+
} catch (ExecutionException ex) {
|
222
|
+
throw Throwables.propagate(ex.getCause());
|
223
|
+
} catch (InterruptedException ex) {
|
224
|
+
throw new ExecuteInterruptedException(ex);
|
225
|
+
}
|
226
|
+
}
|
227
|
+
return joined;
|
228
|
+
} finally {
|
229
|
+
for (int i=joined.size(); i < futures.size(); i++) {
|
230
|
+
futures.get(i).cancel(true);
|
231
|
+
// TODO join?
|
232
|
+
}
|
233
|
+
}
|
234
|
+
}
|
235
|
+
|
236
|
+
private void showProgress(int total)
|
237
|
+
{
|
238
|
+
int running = runningTaskCount.get();
|
239
|
+
int done = completedTaskCount.get();
|
240
|
+
log.info(String.format("{done:%3d / %d, running: %d}", done, total, running));
|
241
|
+
}
|
242
|
+
|
243
|
+
private Future<ProcessResult> startProcessor(final TaskSource taskSource, final Schema schema, final int index)
|
244
|
+
{
|
245
|
+
return executor.submit(new Callable<ProcessResult>() {
|
246
|
+
public ProcessResult call()
|
247
|
+
{
|
248
|
+
try {
|
249
|
+
runningTaskCount.getAndIncrement();
|
250
|
+
final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
|
251
|
+
final InputPlugin in = newInputPlugin(task);
|
252
|
+
final OutputPlugin out = newOutputPlugin(task);
|
253
|
+
|
254
|
+
TransactionalPageOutput tran = out.open(task.getOutputTask(), schema, index);
|
255
|
+
boolean committed = false;
|
256
|
+
try {
|
257
|
+
CommitReport inReport = in.run(task.getInputTask(), schema, index, tran);
|
258
|
+
CommitReport outReport = tran.commit(); // TODO check output.finish() is called. wrap or abstract
|
259
|
+
committed = true;
|
260
|
+
return new ProcessResult(inReport, outReport);
|
261
|
+
} finally {
|
262
|
+
if (!committed) {
|
263
|
+
tran.abort();
|
264
|
+
}
|
265
|
+
tran.close();
|
266
|
+
}
|
267
|
+
} finally {
|
268
|
+
runningTaskCount.getAndDecrement();
|
269
|
+
completedTaskCount.getAndIncrement();
|
270
|
+
}
|
271
|
+
}
|
272
|
+
});
|
273
|
+
}
|
274
|
+
}
|