embulk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.Properties;
|
|
4
|
+
import org.slf4j.ILoggerFactory;
|
|
5
|
+
import org.slf4j.LoggerFactory;
|
|
6
|
+
import org.apache.log4j.PropertyConfigurator;
|
|
7
|
+
import com.google.inject.Provider;
|
|
8
|
+
|
|
9
|
+
public class LoggerProvider
|
|
10
|
+
implements Provider<ILoggerFactory>
|
|
11
|
+
{
|
|
12
|
+
public LoggerProvider()
|
|
13
|
+
{
|
|
14
|
+
// TODO system config
|
|
15
|
+
Properties prop = new Properties();
|
|
16
|
+
|
|
17
|
+
prop.setProperty("log4j.rootLogger", "INFO,root");
|
|
18
|
+
prop.setProperty("log4j.appender.root", "org.apache.log4j.ConsoleAppender");
|
|
19
|
+
prop.setProperty("log4j.appender.root.layout", "org.apache.log4j.PatternLayout");
|
|
20
|
+
prop.setProperty("log4j.appender.root.layout.ConversionPattern", "%d [%p]: %t:%c: %m%n");
|
|
21
|
+
|
|
22
|
+
// TODO
|
|
23
|
+
PropertyConfigurator.configure(prop);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
public ILoggerFactory get()
|
|
27
|
+
{
|
|
28
|
+
return LoggerFactory.getILoggerFactory();
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import io.netty.buffer.PooledByteBufAllocator;
|
|
4
|
+
import io.netty.buffer.ByteBuf;
|
|
5
|
+
import io.netty.util.ResourceLeakDetector;
|
|
6
|
+
import org.embulk.spi.Buffer;
|
|
7
|
+
import org.embulk.spi.BufferAllocator;
|
|
8
|
+
|
|
9
|
+
public class PooledBufferAllocator
|
|
10
|
+
implements BufferAllocator
|
|
11
|
+
{
|
|
12
|
+
private PooledByteBufAllocator nettyBuffer;
|
|
13
|
+
|
|
14
|
+
public PooledBufferAllocator()
|
|
15
|
+
{
|
|
16
|
+
// TODO configure parameters
|
|
17
|
+
this.nettyBuffer = new PooledByteBufAllocator(false);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
public Buffer allocate()
|
|
21
|
+
{
|
|
22
|
+
return new NettyByteBufBuffer(nettyBuffer.buffer());
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
public Buffer allocate(int minimumCapacity)
|
|
26
|
+
{
|
|
27
|
+
int size = 32*1024;
|
|
28
|
+
while (size < minimumCapacity) {
|
|
29
|
+
size *= 2;
|
|
30
|
+
}
|
|
31
|
+
return new NettyByteBufBuffer(nettyBuffer.buffer(size));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private static class NettyByteBufBuffer
|
|
35
|
+
extends Buffer
|
|
36
|
+
{
|
|
37
|
+
private ByteBuf buf;
|
|
38
|
+
private Exception doubleFreeCheck;
|
|
39
|
+
|
|
40
|
+
public NettyByteBufBuffer(ByteBuf buf)
|
|
41
|
+
{
|
|
42
|
+
super(buf.array(), buf.arrayOffset(), buf.capacity());
|
|
43
|
+
this.buf = buf;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
public void release()
|
|
47
|
+
{
|
|
48
|
+
if (doubleFreeCheck != null) {
|
|
49
|
+
doubleFreeCheck.printStackTrace();
|
|
50
|
+
}
|
|
51
|
+
if (buf != null) {
|
|
52
|
+
buf.release();
|
|
53
|
+
buf = null;
|
|
54
|
+
doubleFreeCheck = new NullPointerException();
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import java.util.ArrayList;
|
|
5
|
+
import javax.validation.constraints.NotNull;
|
|
6
|
+
import com.google.inject.Inject;
|
|
7
|
+
import com.google.inject.Injector;
|
|
8
|
+
import com.google.common.base.Throwables;
|
|
9
|
+
import org.embulk.config.Config;
|
|
10
|
+
import org.embulk.config.ConfigDefault;
|
|
11
|
+
import org.embulk.config.Task;
|
|
12
|
+
import org.embulk.config.TaskSource;
|
|
13
|
+
import org.embulk.config.ConfigSource;
|
|
14
|
+
import org.embulk.config.CommitReport;
|
|
15
|
+
import org.embulk.plugin.PluginType;
|
|
16
|
+
import org.embulk.spi.Schema;
|
|
17
|
+
import org.embulk.spi.Page;
|
|
18
|
+
import org.embulk.spi.PageOutput;
|
|
19
|
+
import org.embulk.spi.PageReader;
|
|
20
|
+
import org.embulk.spi.InputPlugin;
|
|
21
|
+
import org.embulk.spi.Exec;
|
|
22
|
+
import org.embulk.spi.ExecSession;
|
|
23
|
+
import org.embulk.spi.ExecAction;
|
|
24
|
+
|
|
25
|
+
public class PreviewExecutor
|
|
26
|
+
{
|
|
27
|
+
private final Injector injector;
|
|
28
|
+
private final ConfigSource systemConfig;
|
|
29
|
+
|
|
30
|
+
public interface PreviewTask
|
|
31
|
+
extends Task
|
|
32
|
+
{
|
|
33
|
+
@Config("in")
|
|
34
|
+
@NotNull
|
|
35
|
+
public ConfigSource getInputConfig();
|
|
36
|
+
|
|
37
|
+
@Config("preview_sample_rows")
|
|
38
|
+
@ConfigDefault("15")
|
|
39
|
+
public int getSampleRows();
|
|
40
|
+
|
|
41
|
+
public TaskSource getInputTask();
|
|
42
|
+
public void setInputTask(TaskSource taskSource);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
@Inject
|
|
46
|
+
public PreviewExecutor(Injector injector,
|
|
47
|
+
@ForSystemConfig ConfigSource systemConfig)
|
|
48
|
+
{
|
|
49
|
+
this.injector = injector;
|
|
50
|
+
this.systemConfig = systemConfig;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
public PreviewResult preview(ExecSession exec, final ConfigSource config)
|
|
54
|
+
{
|
|
55
|
+
try {
|
|
56
|
+
return Exec.doWith(exec, new ExecAction<PreviewResult>() {
|
|
57
|
+
public PreviewResult run()
|
|
58
|
+
{
|
|
59
|
+
return doPreview(config);
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
} catch (Exception ex) {
|
|
63
|
+
throw Throwables.propagate(ex);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
protected InputPlugin newInputPlugin(PreviewTask task)
|
|
68
|
+
{
|
|
69
|
+
return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private PreviewResult doPreview(ConfigSource config)
|
|
73
|
+
{
|
|
74
|
+
final PreviewTask task = config.loadConfig(PreviewTask.class);
|
|
75
|
+
InputPlugin input = newInputPlugin(task);
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
input.transaction(task.getInputConfig(), new InputPlugin.Control() {
|
|
79
|
+
public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
|
|
80
|
+
{
|
|
81
|
+
InputPlugin input = newInputPlugin(task);
|
|
82
|
+
try (SamplingPageOutput out = new SamplingPageOutput(task.getSampleRows(), schema)) {
|
|
83
|
+
input.run(taskSource, schema, 0, out);
|
|
84
|
+
}
|
|
85
|
+
throw new NoSampleException("No input records to preview");
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
|
|
89
|
+
} catch (PreviewedNoticeError previewed) {
|
|
90
|
+
return previewed.getPreviewResult();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
private static class SamplingPageOutput
|
|
95
|
+
implements PageOutput
|
|
96
|
+
{
|
|
97
|
+
private final int sampleRows;
|
|
98
|
+
private final Schema schema;
|
|
99
|
+
private List<Page> pages;
|
|
100
|
+
private int recordCount;
|
|
101
|
+
|
|
102
|
+
public SamplingPageOutput(int sampleRows, Schema schema)
|
|
103
|
+
{
|
|
104
|
+
this.sampleRows = sampleRows;
|
|
105
|
+
this.schema = schema;
|
|
106
|
+
this.pages = new ArrayList<Page>();
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
@Override
|
|
110
|
+
public void add(Page page)
|
|
111
|
+
{
|
|
112
|
+
pages.add(page);
|
|
113
|
+
recordCount += PageReader.getRecordCount(page);
|
|
114
|
+
if (recordCount >= sampleRows) {
|
|
115
|
+
finish();
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
@Override
|
|
120
|
+
public void finish()
|
|
121
|
+
{
|
|
122
|
+
if (recordCount == 0) {
|
|
123
|
+
throw new NoSampleException("No input records to preview");
|
|
124
|
+
}
|
|
125
|
+
PreviewResult res = new PreviewResult(schema, pages);
|
|
126
|
+
pages = null;
|
|
127
|
+
throw new PreviewedNoticeError(res);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
@Override
|
|
131
|
+
public void close()
|
|
132
|
+
{
|
|
133
|
+
for (Page page : pages) {
|
|
134
|
+
page.release();
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import org.embulk.spi.Schema;
|
|
5
|
+
import org.embulk.spi.Page;
|
|
6
|
+
|
|
7
|
+
public class PreviewResult
|
|
8
|
+
{
|
|
9
|
+
private final Schema schema;
|
|
10
|
+
private final List<Page> pages;
|
|
11
|
+
|
|
12
|
+
public PreviewResult(Schema schema, List<Page> pages)
|
|
13
|
+
{
|
|
14
|
+
this.schema = schema;
|
|
15
|
+
this.pages = pages;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
public Schema getSchema()
|
|
19
|
+
{
|
|
20
|
+
return schema;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
public List<Page> getPages()
|
|
24
|
+
{
|
|
25
|
+
return pages;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
public class PreviewedNoticeError
|
|
4
|
+
extends Error
|
|
5
|
+
{
|
|
6
|
+
private final PreviewResult previewResult;
|
|
7
|
+
|
|
8
|
+
public PreviewedNoticeError(PreviewResult previewResult)
|
|
9
|
+
{
|
|
10
|
+
this.previewResult = previewResult;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
public PreviewResult getPreviewResult()
|
|
14
|
+
{
|
|
15
|
+
return previewResult;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import com.google.inject.Inject;
|
|
5
|
+
import org.embulk.config.TaskSource;
|
|
6
|
+
import org.embulk.config.ConfigSource;
|
|
7
|
+
import org.embulk.config.CommitReport;
|
|
8
|
+
import org.embulk.plugin.PluginType;
|
|
9
|
+
import org.embulk.spi.Schema;
|
|
10
|
+
import org.embulk.spi.Exec;
|
|
11
|
+
import org.embulk.spi.Page;
|
|
12
|
+
import org.embulk.spi.Buffer;
|
|
13
|
+
import org.embulk.spi.InputPlugin;
|
|
14
|
+
import org.embulk.spi.ParserPlugin;
|
|
15
|
+
import org.embulk.spi.FileInput;
|
|
16
|
+
import org.embulk.spi.PageOutput;
|
|
17
|
+
import static org.embulk.spi.util.Inputs.each;
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
* Used by GuessExecutor
|
|
21
|
+
*/
|
|
22
|
+
class SamplingParserPlugin
|
|
23
|
+
implements ParserPlugin
|
|
24
|
+
{
|
|
25
|
+
private final int maxSampleSize;
|
|
26
|
+
|
|
27
|
+
@Inject
|
|
28
|
+
public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
|
|
29
|
+
{
|
|
30
|
+
this.maxSampleSize = 32*1024; // TODO get sample syze from system config
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
@Override
|
|
34
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
|
35
|
+
{
|
|
36
|
+
control.run(Exec.newTaskSource(), null);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
@Override
|
|
40
|
+
public void run(TaskSource taskSource, Schema schema,
|
|
41
|
+
FileInput input, PageOutput output)
|
|
42
|
+
{
|
|
43
|
+
Buffer buffer = getSample(input, maxSampleSize);
|
|
44
|
+
throw new SampledNoticeError(buffer);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static Buffer runFileInputSampling(ConfigSource config)
|
|
48
|
+
{
|
|
49
|
+
// override in.parser.type so that FileInputRunner creates GuessParserPlugin
|
|
50
|
+
ConfigSource samplingInputConfig = config.getNested("in").deepCopy();
|
|
51
|
+
samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
|
|
52
|
+
|
|
53
|
+
final InputPlugin input = Exec.newPlugin(InputPlugin.class, samplingInputConfig.get(PluginType.class, "type"));
|
|
54
|
+
try {
|
|
55
|
+
input.transaction(samplingInputConfig, new InputPlugin.Control() {
|
|
56
|
+
public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
|
|
57
|
+
{
|
|
58
|
+
input.run(taskSource, schema, 0, new PageOutput() {
|
|
59
|
+
@Override
|
|
60
|
+
public void add(Page page)
|
|
61
|
+
{
|
|
62
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
public void finish() { }
|
|
66
|
+
|
|
67
|
+
public void close() { }
|
|
68
|
+
});
|
|
69
|
+
throw new NoSampleException("No input files to guess parser configuration");
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
|
|
73
|
+
} catch (SampledNoticeError error) {
|
|
74
|
+
return error.getSample();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
private static Buffer getSample(FileInput fileInput, int maxSampleSize)
|
|
79
|
+
{
|
|
80
|
+
Buffer sample = Buffer.allocate(maxSampleSize);
|
|
81
|
+
int sampleSize = 0;
|
|
82
|
+
|
|
83
|
+
while (fileInput.nextFile()) {
|
|
84
|
+
for (Buffer buffer : each(fileInput)) {
|
|
85
|
+
if (sampleSize >= maxSampleSize) {
|
|
86
|
+
// skip remaining all buffers so that FileInputPlugin.runInput doesn't
|
|
87
|
+
// throw exceptions at channel.join()
|
|
88
|
+
} else {
|
|
89
|
+
int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
|
|
90
|
+
sample.setBytes(sampleSize, buffer, 0, size);
|
|
91
|
+
sampleSize += size;
|
|
92
|
+
}
|
|
93
|
+
buffer.release();
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
sample.limit(sampleSize);
|
|
98
|
+
return sample;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
public static class SampledNoticeError
|
|
102
|
+
extends Error
|
|
103
|
+
{
|
|
104
|
+
private final Buffer sample;
|
|
105
|
+
|
|
106
|
+
public SampledNoticeError(Buffer sample)
|
|
107
|
+
{
|
|
108
|
+
this.sample = sample;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
public Buffer getSample()
|
|
112
|
+
{
|
|
113
|
+
return sample;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import com.google.inject.Module;
|
|
4
|
+
import com.google.inject.Binder;
|
|
5
|
+
import org.embulk.config.ConfigSource;
|
|
6
|
+
|
|
7
|
+
public class SystemConfigModule
|
|
8
|
+
implements Module
|
|
9
|
+
{
|
|
10
|
+
private final ConfigSource systemConfig;
|
|
11
|
+
|
|
12
|
+
public SystemConfigModule(ConfigSource systemConfig)
|
|
13
|
+
{
|
|
14
|
+
this.systemConfig = systemConfig;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
@Override
|
|
18
|
+
public void configure(Binder binder)
|
|
19
|
+
{
|
|
20
|
+
binder.bind(ConfigSource.class)
|
|
21
|
+
.annotatedWith(ForSystemConfig.class)
|
|
22
|
+
.toInstance(systemConfig);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
package org.embulk.jruby;
|
|
2
|
+
|
|
3
|
+
import com.google.inject.Inject;
|
|
4
|
+
import org.jruby.embed.ScriptingContainer;
|
|
5
|
+
import org.jruby.embed.InvokeFailedException;
|
|
6
|
+
import org.embulk.plugin.PluginType;
|
|
7
|
+
import org.embulk.plugin.PluginSource;
|
|
8
|
+
import org.embulk.plugin.PluginSourceNotMatchException;
|
|
9
|
+
import org.embulk.spi.InputPlugin;
|
|
10
|
+
import org.embulk.spi.OutputPlugin;
|
|
11
|
+
import org.embulk.spi.ParserPlugin;
|
|
12
|
+
import org.embulk.spi.FormatterPlugin;
|
|
13
|
+
import org.embulk.spi.DecoderPlugin;
|
|
14
|
+
import org.embulk.spi.EncoderPlugin;
|
|
15
|
+
//import org.embulk.spi.LineFilterPlugin;
|
|
16
|
+
import org.embulk.spi.GuessPlugin;
|
|
17
|
+
|
|
18
|
+
public class JRubyPluginSource
|
|
19
|
+
implements PluginSource
|
|
20
|
+
{
|
|
21
|
+
private final ScriptingContainer jruby;
|
|
22
|
+
private final Object rubyPluginManager;
|
|
23
|
+
|
|
24
|
+
@Inject
|
|
25
|
+
public JRubyPluginSource(ScriptingContainer jruby)
|
|
26
|
+
{
|
|
27
|
+
this.jruby = jruby;
|
|
28
|
+
|
|
29
|
+
// get Embulk::Plugin
|
|
30
|
+
//this.rubyPluginManager = ((RubyModule) jruby.get("Embulk")).const_get(
|
|
31
|
+
// RubySymbol.newSymbol(
|
|
32
|
+
// jruby.getProvider().getRuntime(), "Plugin"));
|
|
33
|
+
this.rubyPluginManager = jruby.runScriptlet("Embulk::Plugin");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
public <T> T newPlugin(Class<T> iface, PluginType type) throws PluginSourceNotMatchException
|
|
37
|
+
{
|
|
38
|
+
String name = type.getName();
|
|
39
|
+
|
|
40
|
+
String category;
|
|
41
|
+
if (InputPlugin.class.isAssignableFrom(iface)) {
|
|
42
|
+
category = "input";
|
|
43
|
+
} else if (OutputPlugin.class.isAssignableFrom(iface)) {
|
|
44
|
+
category = "output";
|
|
45
|
+
} else if (ParserPlugin.class.isAssignableFrom(iface)) {
|
|
46
|
+
category = "parser";
|
|
47
|
+
} else if (FormatterPlugin.class.isAssignableFrom(iface)) {
|
|
48
|
+
category = "formatter";
|
|
49
|
+
} else if (DecoderPlugin.class.isAssignableFrom(iface)) {
|
|
50
|
+
category = "decoder";
|
|
51
|
+
} else if (EncoderPlugin.class.isAssignableFrom(iface)) {
|
|
52
|
+
category = "encoder";
|
|
53
|
+
//} else if (LineFilterPlugin.class.isAssignableFrom(iface)) {
|
|
54
|
+
// category = "line_filter";
|
|
55
|
+
} else if (GuessPlugin.class.isAssignableFrom(iface)) {
|
|
56
|
+
category = "guess";
|
|
57
|
+
} else {
|
|
58
|
+
// unsupported plugin category
|
|
59
|
+
throw new PluginSourceNotMatchException("Plugin interface "+iface+" is not supported in JRuby");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
String methodName = "new_java_" + category;
|
|
63
|
+
try {
|
|
64
|
+
return jruby.callMethod(rubyPluginManager, methodName, name, iface);
|
|
65
|
+
} catch (InvokeFailedException ex) {
|
|
66
|
+
throw new PluginSourceNotMatchException(ex.getCause());
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|