embulk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.Properties;
|
4
|
+
import org.slf4j.ILoggerFactory;
|
5
|
+
import org.slf4j.LoggerFactory;
|
6
|
+
import org.apache.log4j.PropertyConfigurator;
|
7
|
+
import com.google.inject.Provider;
|
8
|
+
|
9
|
+
public class LoggerProvider
|
10
|
+
implements Provider<ILoggerFactory>
|
11
|
+
{
|
12
|
+
public LoggerProvider()
|
13
|
+
{
|
14
|
+
// TODO system config
|
15
|
+
Properties prop = new Properties();
|
16
|
+
|
17
|
+
prop.setProperty("log4j.rootLogger", "INFO,root");
|
18
|
+
prop.setProperty("log4j.appender.root", "org.apache.log4j.ConsoleAppender");
|
19
|
+
prop.setProperty("log4j.appender.root.layout", "org.apache.log4j.PatternLayout");
|
20
|
+
prop.setProperty("log4j.appender.root.layout.ConversionPattern", "%d [%p]: %t:%c: %m%n");
|
21
|
+
|
22
|
+
// TODO
|
23
|
+
PropertyConfigurator.configure(prop);
|
24
|
+
}
|
25
|
+
|
26
|
+
public ILoggerFactory get()
|
27
|
+
{
|
28
|
+
return LoggerFactory.getILoggerFactory();
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import io.netty.buffer.PooledByteBufAllocator;
|
4
|
+
import io.netty.buffer.ByteBuf;
|
5
|
+
import io.netty.util.ResourceLeakDetector;
|
6
|
+
import org.embulk.spi.Buffer;
|
7
|
+
import org.embulk.spi.BufferAllocator;
|
8
|
+
|
9
|
+
public class PooledBufferAllocator
|
10
|
+
implements BufferAllocator
|
11
|
+
{
|
12
|
+
private PooledByteBufAllocator nettyBuffer;
|
13
|
+
|
14
|
+
public PooledBufferAllocator()
|
15
|
+
{
|
16
|
+
// TODO configure parameters
|
17
|
+
this.nettyBuffer = new PooledByteBufAllocator(false);
|
18
|
+
}
|
19
|
+
|
20
|
+
public Buffer allocate()
|
21
|
+
{
|
22
|
+
return new NettyByteBufBuffer(nettyBuffer.buffer());
|
23
|
+
}
|
24
|
+
|
25
|
+
public Buffer allocate(int minimumCapacity)
|
26
|
+
{
|
27
|
+
int size = 32*1024;
|
28
|
+
while (size < minimumCapacity) {
|
29
|
+
size *= 2;
|
30
|
+
}
|
31
|
+
return new NettyByteBufBuffer(nettyBuffer.buffer(size));
|
32
|
+
}
|
33
|
+
|
34
|
+
private static class NettyByteBufBuffer
|
35
|
+
extends Buffer
|
36
|
+
{
|
37
|
+
private ByteBuf buf;
|
38
|
+
private Exception doubleFreeCheck;
|
39
|
+
|
40
|
+
public NettyByteBufBuffer(ByteBuf buf)
|
41
|
+
{
|
42
|
+
super(buf.array(), buf.arrayOffset(), buf.capacity());
|
43
|
+
this.buf = buf;
|
44
|
+
}
|
45
|
+
|
46
|
+
public void release()
|
47
|
+
{
|
48
|
+
if (doubleFreeCheck != null) {
|
49
|
+
doubleFreeCheck.printStackTrace();
|
50
|
+
}
|
51
|
+
if (buf != null) {
|
52
|
+
buf.release();
|
53
|
+
buf = null;
|
54
|
+
doubleFreeCheck = new NullPointerException();
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
@@ -0,0 +1,138 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import javax.validation.constraints.NotNull;
|
6
|
+
import com.google.inject.Inject;
|
7
|
+
import com.google.inject.Injector;
|
8
|
+
import com.google.common.base.Throwables;
|
9
|
+
import org.embulk.config.Config;
|
10
|
+
import org.embulk.config.ConfigDefault;
|
11
|
+
import org.embulk.config.Task;
|
12
|
+
import org.embulk.config.TaskSource;
|
13
|
+
import org.embulk.config.ConfigSource;
|
14
|
+
import org.embulk.config.CommitReport;
|
15
|
+
import org.embulk.plugin.PluginType;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.Page;
|
18
|
+
import org.embulk.spi.PageOutput;
|
19
|
+
import org.embulk.spi.PageReader;
|
20
|
+
import org.embulk.spi.InputPlugin;
|
21
|
+
import org.embulk.spi.Exec;
|
22
|
+
import org.embulk.spi.ExecSession;
|
23
|
+
import org.embulk.spi.ExecAction;
|
24
|
+
|
25
|
+
public class PreviewExecutor
|
26
|
+
{
|
27
|
+
private final Injector injector;
|
28
|
+
private final ConfigSource systemConfig;
|
29
|
+
|
30
|
+
public interface PreviewTask
|
31
|
+
extends Task
|
32
|
+
{
|
33
|
+
@Config("in")
|
34
|
+
@NotNull
|
35
|
+
public ConfigSource getInputConfig();
|
36
|
+
|
37
|
+
@Config("preview_sample_rows")
|
38
|
+
@ConfigDefault("15")
|
39
|
+
public int getSampleRows();
|
40
|
+
|
41
|
+
public TaskSource getInputTask();
|
42
|
+
public void setInputTask(TaskSource taskSource);
|
43
|
+
}
|
44
|
+
|
45
|
+
@Inject
|
46
|
+
public PreviewExecutor(Injector injector,
|
47
|
+
@ForSystemConfig ConfigSource systemConfig)
|
48
|
+
{
|
49
|
+
this.injector = injector;
|
50
|
+
this.systemConfig = systemConfig;
|
51
|
+
}
|
52
|
+
|
53
|
+
public PreviewResult preview(ExecSession exec, final ConfigSource config)
|
54
|
+
{
|
55
|
+
try {
|
56
|
+
return Exec.doWith(exec, new ExecAction<PreviewResult>() {
|
57
|
+
public PreviewResult run()
|
58
|
+
{
|
59
|
+
return doPreview(config);
|
60
|
+
}
|
61
|
+
});
|
62
|
+
} catch (Exception ex) {
|
63
|
+
throw Throwables.propagate(ex);
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
protected InputPlugin newInputPlugin(PreviewTask task)
|
68
|
+
{
|
69
|
+
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
70
|
+
}
|
71
|
+
|
72
|
+
private PreviewResult doPreview(ConfigSource config)
|
73
|
+
{
|
74
|
+
final PreviewTask task = config.loadConfig(PreviewTask.class);
|
75
|
+
InputPlugin input = newInputPlugin(task);
|
76
|
+
|
77
|
+
try {
|
78
|
+
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
79
|
+
public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
|
80
|
+
{
|
81
|
+
InputPlugin input = newInputPlugin(task);
|
82
|
+
try (SamplingPageOutput out = new SamplingPageOutput(task.getSampleRows(), schema)) {
|
83
|
+
input.run(taskSource, schema, 0, out);
|
84
|
+
}
|
85
|
+
throw new NoSampleException("No input records to preview");
|
86
|
+
}
|
87
|
+
});
|
88
|
+
throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
|
89
|
+
} catch (PreviewedNoticeError previewed) {
|
90
|
+
return previewed.getPreviewResult();
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
private static class SamplingPageOutput
|
95
|
+
implements PageOutput
|
96
|
+
{
|
97
|
+
private final int sampleRows;
|
98
|
+
private final Schema schema;
|
99
|
+
private List<Page> pages;
|
100
|
+
private int recordCount;
|
101
|
+
|
102
|
+
public SamplingPageOutput(int sampleRows, Schema schema)
|
103
|
+
{
|
104
|
+
this.sampleRows = sampleRows;
|
105
|
+
this.schema = schema;
|
106
|
+
this.pages = new ArrayList<Page>();
|
107
|
+
}
|
108
|
+
|
109
|
+
@Override
|
110
|
+
public void add(Page page)
|
111
|
+
{
|
112
|
+
pages.add(page);
|
113
|
+
recordCount += PageReader.getRecordCount(page);
|
114
|
+
if (recordCount >= sampleRows) {
|
115
|
+
finish();
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
@Override
|
120
|
+
public void finish()
|
121
|
+
{
|
122
|
+
if (recordCount == 0) {
|
123
|
+
throw new NoSampleException("No input records to preview");
|
124
|
+
}
|
125
|
+
PreviewResult res = new PreviewResult(schema, pages);
|
126
|
+
pages = null;
|
127
|
+
throw new PreviewedNoticeError(res);
|
128
|
+
}
|
129
|
+
|
130
|
+
@Override
|
131
|
+
public void close()
|
132
|
+
{
|
133
|
+
for (Page page : pages) {
|
134
|
+
page.release();
|
135
|
+
}
|
136
|
+
}
|
137
|
+
}
|
138
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import org.embulk.spi.Schema;
|
5
|
+
import org.embulk.spi.Page;
|
6
|
+
|
7
|
+
public class PreviewResult
|
8
|
+
{
|
9
|
+
private final Schema schema;
|
10
|
+
private final List<Page> pages;
|
11
|
+
|
12
|
+
public PreviewResult(Schema schema, List<Page> pages)
|
13
|
+
{
|
14
|
+
this.schema = schema;
|
15
|
+
this.pages = pages;
|
16
|
+
}
|
17
|
+
|
18
|
+
public Schema getSchema()
|
19
|
+
{
|
20
|
+
return schema;
|
21
|
+
}
|
22
|
+
|
23
|
+
public List<Page> getPages()
|
24
|
+
{
|
25
|
+
return pages;
|
26
|
+
}
|
27
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
public class PreviewedNoticeError
|
4
|
+
extends Error
|
5
|
+
{
|
6
|
+
private final PreviewResult previewResult;
|
7
|
+
|
8
|
+
public PreviewedNoticeError(PreviewResult previewResult)
|
9
|
+
{
|
10
|
+
this.previewResult = previewResult;
|
11
|
+
}
|
12
|
+
|
13
|
+
public PreviewResult getPreviewResult()
|
14
|
+
{
|
15
|
+
return previewResult;
|
16
|
+
}
|
17
|
+
}
|
@@ -0,0 +1,116 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import com.google.inject.Inject;
|
5
|
+
import org.embulk.config.TaskSource;
|
6
|
+
import org.embulk.config.ConfigSource;
|
7
|
+
import org.embulk.config.CommitReport;
|
8
|
+
import org.embulk.plugin.PluginType;
|
9
|
+
import org.embulk.spi.Schema;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.Page;
|
12
|
+
import org.embulk.spi.Buffer;
|
13
|
+
import org.embulk.spi.InputPlugin;
|
14
|
+
import org.embulk.spi.ParserPlugin;
|
15
|
+
import org.embulk.spi.FileInput;
|
16
|
+
import org.embulk.spi.PageOutput;
|
17
|
+
import static org.embulk.spi.util.Inputs.each;
|
18
|
+
|
19
|
+
/*
|
20
|
+
* Used by GuessExecutor
|
21
|
+
*/
|
22
|
+
class SamplingParserPlugin
|
23
|
+
implements ParserPlugin
|
24
|
+
{
|
25
|
+
private final int maxSampleSize;
|
26
|
+
|
27
|
+
@Inject
|
28
|
+
public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
|
29
|
+
{
|
30
|
+
this.maxSampleSize = 32*1024; // TODO get sample syze from system config
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
35
|
+
{
|
36
|
+
control.run(Exec.newTaskSource(), null);
|
37
|
+
}
|
38
|
+
|
39
|
+
@Override
|
40
|
+
public void run(TaskSource taskSource, Schema schema,
|
41
|
+
FileInput input, PageOutput output)
|
42
|
+
{
|
43
|
+
Buffer buffer = getSample(input, maxSampleSize);
|
44
|
+
throw new SampledNoticeError(buffer);
|
45
|
+
}
|
46
|
+
|
47
|
+
static Buffer runFileInputSampling(ConfigSource config)
|
48
|
+
{
|
49
|
+
// override in.parser.type so that FileInputRunner creates GuessParserPlugin
|
50
|
+
ConfigSource samplingInputConfig = config.getNested("in").deepCopy();
|
51
|
+
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
52
|
+
|
53
|
+
final InputPlugin input = Exec.newPlugin(InputPlugin.class, samplingInputConfig.get(PluginType.class, "type"));
|
54
|
+
try {
|
55
|
+
input.transaction(samplingInputConfig, new InputPlugin.Control() {
|
56
|
+
public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
|
57
|
+
{
|
58
|
+
input.run(taskSource, schema, 0, new PageOutput() {
|
59
|
+
@Override
|
60
|
+
public void add(Page page)
|
61
|
+
{
|
62
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
63
|
+
}
|
64
|
+
|
65
|
+
public void finish() { }
|
66
|
+
|
67
|
+
public void close() { }
|
68
|
+
});
|
69
|
+
throw new NoSampleException("No input files to guess parser configuration");
|
70
|
+
}
|
71
|
+
});
|
72
|
+
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
|
73
|
+
} catch (SampledNoticeError error) {
|
74
|
+
return error.getSample();
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
private static Buffer getSample(FileInput fileInput, int maxSampleSize)
|
79
|
+
{
|
80
|
+
Buffer sample = Buffer.allocate(maxSampleSize);
|
81
|
+
int sampleSize = 0;
|
82
|
+
|
83
|
+
while (fileInput.nextFile()) {
|
84
|
+
for (Buffer buffer : each(fileInput)) {
|
85
|
+
if (sampleSize >= maxSampleSize) {
|
86
|
+
// skip remaining all buffers so that FileInputPlugin.runInput doesn't
|
87
|
+
// throw exceptions at channel.join()
|
88
|
+
} else {
|
89
|
+
int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
|
90
|
+
sample.setBytes(sampleSize, buffer, 0, size);
|
91
|
+
sampleSize += size;
|
92
|
+
}
|
93
|
+
buffer.release();
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
sample.limit(sampleSize);
|
98
|
+
return sample;
|
99
|
+
}
|
100
|
+
|
101
|
+
public static class SampledNoticeError
|
102
|
+
extends Error
|
103
|
+
{
|
104
|
+
private final Buffer sample;
|
105
|
+
|
106
|
+
public SampledNoticeError(Buffer sample)
|
107
|
+
{
|
108
|
+
this.sample = sample;
|
109
|
+
}
|
110
|
+
|
111
|
+
public Buffer getSample()
|
112
|
+
{
|
113
|
+
return sample;
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
package org.embulk.exec;
|
2
|
+
|
3
|
+
import com.google.inject.Module;
|
4
|
+
import com.google.inject.Binder;
|
5
|
+
import org.embulk.config.ConfigSource;
|
6
|
+
|
7
|
+
public class SystemConfigModule
|
8
|
+
implements Module
|
9
|
+
{
|
10
|
+
private final ConfigSource systemConfig;
|
11
|
+
|
12
|
+
public SystemConfigModule(ConfigSource systemConfig)
|
13
|
+
{
|
14
|
+
this.systemConfig = systemConfig;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public void configure(Binder binder)
|
19
|
+
{
|
20
|
+
binder.bind(ConfigSource.class)
|
21
|
+
.annotatedWith(ForSystemConfig.class)
|
22
|
+
.toInstance(systemConfig);
|
23
|
+
}
|
24
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
package org.embulk.jruby;
|
2
|
+
|
3
|
+
import com.google.inject.Inject;
|
4
|
+
import org.jruby.embed.ScriptingContainer;
|
5
|
+
import org.jruby.embed.InvokeFailedException;
|
6
|
+
import org.embulk.plugin.PluginType;
|
7
|
+
import org.embulk.plugin.PluginSource;
|
8
|
+
import org.embulk.plugin.PluginSourceNotMatchException;
|
9
|
+
import org.embulk.spi.InputPlugin;
|
10
|
+
import org.embulk.spi.OutputPlugin;
|
11
|
+
import org.embulk.spi.ParserPlugin;
|
12
|
+
import org.embulk.spi.FormatterPlugin;
|
13
|
+
import org.embulk.spi.DecoderPlugin;
|
14
|
+
import org.embulk.spi.EncoderPlugin;
|
15
|
+
//import org.embulk.spi.LineFilterPlugin;
|
16
|
+
import org.embulk.spi.GuessPlugin;
|
17
|
+
|
18
|
+
public class JRubyPluginSource
|
19
|
+
implements PluginSource
|
20
|
+
{
|
21
|
+
private final ScriptingContainer jruby;
|
22
|
+
private final Object rubyPluginManager;
|
23
|
+
|
24
|
+
@Inject
|
25
|
+
public JRubyPluginSource(ScriptingContainer jruby)
|
26
|
+
{
|
27
|
+
this.jruby = jruby;
|
28
|
+
|
29
|
+
// get Embulk::Plugin
|
30
|
+
//this.rubyPluginManager = ((RubyModule) jruby.get("Embulk")).const_get(
|
31
|
+
// RubySymbol.newSymbol(
|
32
|
+
// jruby.getProvider().getRuntime(), "Plugin"));
|
33
|
+
this.rubyPluginManager = jruby.runScriptlet("Embulk::Plugin");
|
34
|
+
}
|
35
|
+
|
36
|
+
public <T> T newPlugin(Class<T> iface, PluginType type) throws PluginSourceNotMatchException
|
37
|
+
{
|
38
|
+
String name = type.getName();
|
39
|
+
|
40
|
+
String category;
|
41
|
+
if (InputPlugin.class.isAssignableFrom(iface)) {
|
42
|
+
category = "input";
|
43
|
+
} else if (OutputPlugin.class.isAssignableFrom(iface)) {
|
44
|
+
category = "output";
|
45
|
+
} else if (ParserPlugin.class.isAssignableFrom(iface)) {
|
46
|
+
category = "parser";
|
47
|
+
} else if (FormatterPlugin.class.isAssignableFrom(iface)) {
|
48
|
+
category = "formatter";
|
49
|
+
} else if (DecoderPlugin.class.isAssignableFrom(iface)) {
|
50
|
+
category = "decoder";
|
51
|
+
} else if (EncoderPlugin.class.isAssignableFrom(iface)) {
|
52
|
+
category = "encoder";
|
53
|
+
//} else if (LineFilterPlugin.class.isAssignableFrom(iface)) {
|
54
|
+
// category = "line_filter";
|
55
|
+
} else if (GuessPlugin.class.isAssignableFrom(iface)) {
|
56
|
+
category = "guess";
|
57
|
+
} else {
|
58
|
+
// unsupported plugin category
|
59
|
+
throw new PluginSourceNotMatchException("Plugin interface "+iface+" is not supported in JRuby");
|
60
|
+
}
|
61
|
+
|
62
|
+
String methodName = "new_java_" + category;
|
63
|
+
try {
|
64
|
+
return jruby.callMethod(rubyPluginManager, methodName, name, iface);
|
65
|
+
} catch (InvokeFailedException ex) {
|
66
|
+
throw new PluginSourceNotMatchException(ex.getCause());
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|