embulk 0.7.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.ruby-version +1 -0
- data/.travis.yml +18 -0
- data/COPYING +14 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +31 -0
- data/README.md +206 -0
- data/Rakefile +26 -0
- data/appveyor.yml +20 -0
- data/bin/embulk +106 -0
- data/build.gradle +338 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +22 -0
- data/embulk-cli/src/main/sh/selfrun.sh +158 -0
- data/embulk-cli/src/test/java/org/embulk/cli/DummyMain.java +23 -0
- data/embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java +281 -0
- data/embulk-core/build.gradle +59 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkEmbed.java +315 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +76 -0
- data/embulk-core/src/main/java/org/embulk/command/PreviewPrinter.java +84 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePreviewPrinter.java +107 -0
- data/embulk-core/src/main/java/org/embulk/command/VerticalPreviewPrinter.java +47 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +33 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +29 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +141 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +31 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +39 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +231 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +84 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +123 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskReport.java +29 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +345 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +31 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +38 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +652 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +52 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForGuess.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +373 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +129 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +60 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +77 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +183 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +100 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +136 -0
- data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/TempFileAllocator.java +35 -0
- data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +157 -0
- data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +22 -0
- data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +145 -0
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +26 -0
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +61 -0
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +187 -0
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +89 -0
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +38 -0
- data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +97 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +72 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +119 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +96 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +168 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +9 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +71 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +78 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/InputPluginWrapper.java +102 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/PluginWrappers.java +30 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileInputWrapper.java +96 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileOutputWrapper.java +102 -0
- data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalPageOutputWrapper.java +95 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +148 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +112 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +217 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +44 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +30 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +162 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +28 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +202 -0
- data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +33 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +29 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +51 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +338 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +226 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +117 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +134 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +93 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfigException.java +22 -0
- data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/TempFileException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +87 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +100 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +97 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +104 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +49 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +58 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +41 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +44 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/unit/ByteSize.java +156 -0
- data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFile.java +106 -0
- data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFileSerDe.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/unit/ToString.java +54 -0
- data/embulk-core/src/main/java/org/embulk/spi/unit/ToStringMap.java +34 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnNotFoundException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetter.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +161 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +95 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +119 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +100 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +190 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamTransactionalFileInput.java +25 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +65 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +123 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +88 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +128 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/RetryExecutor.java +130 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Timestamps.java +53 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +64 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +61 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +69 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +34 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +56 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +64 -0
- data/embulk-core/src/main/resources/embulk/logback-color.xml +72 -0
- data/embulk-core/src/main/resources/embulk/logback-console.xml +14 -0
- data/embulk-core/src/main/resources/embulk/logback-file.xml +25 -0
- data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +70 -0
- data/embulk-core/src/main/resources/embulk/parent_first_resources.properties +28 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +114 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigLoader.java +66 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +58 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +57 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +89 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +196 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +207 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +319 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/unit/TestByteSize.java +79 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-docs/Makefile +178 -0
- data/embulk-docs/build.gradle +32 -0
- data/embulk-docs/make.bat +243 -0
- data/embulk-docs/push-gh-pages.sh +49 -0
- data/embulk-docs/src/_static/embulk-architecture.png +0 -0
- data/embulk-docs/src/_static/embulk-logo.png +0 -0
- data/embulk-docs/src/_static/embulk-logo.svg +133 -0
- data/embulk-docs/src/built-in.rst +440 -0
- data/embulk-docs/src/conf.py +260 -0
- data/embulk-docs/src/customization.rst +184 -0
- data/embulk-docs/src/index.rst +84 -0
- data/embulk-docs/src/recipe.rst +8 -0
- data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +153 -0
- data/embulk-docs/src/release.rst +57 -0
- data/embulk-docs/src/release/release-0.1.0.rst +8 -0
- data/embulk-docs/src/release/release-0.2.0.rst +16 -0
- data/embulk-docs/src/release/release-0.2.1.rst +19 -0
- data/embulk-docs/src/release/release-0.3.0.rst +34 -0
- data/embulk-docs/src/release/release-0.3.1.rst +11 -0
- data/embulk-docs/src/release/release-0.3.2.rst +15 -0
- data/embulk-docs/src/release/release-0.4.0.rst +74 -0
- data/embulk-docs/src/release/release-0.4.1.rst +18 -0
- data/embulk-docs/src/release/release-0.4.10.rst +17 -0
- data/embulk-docs/src/release/release-0.4.2.rst +18 -0
- data/embulk-docs/src/release/release-0.4.3.rst +34 -0
- data/embulk-docs/src/release/release-0.4.4.rst +39 -0
- data/embulk-docs/src/release/release-0.4.5.rst +24 -0
- data/embulk-docs/src/release/release-0.4.6.rst +30 -0
- data/embulk-docs/src/release/release-0.4.7.rst +16 -0
- data/embulk-docs/src/release/release-0.4.8.rst +15 -0
- data/embulk-docs/src/release/release-0.4.9.rst +23 -0
- data/embulk-docs/src/release/release-0.5.0.rst +89 -0
- data/embulk-docs/src/release/release-0.5.1.rst +13 -0
- data/embulk-docs/src/release/release-0.5.2.rst +30 -0
- data/embulk-docs/src/release/release-0.5.3.rst +22 -0
- data/embulk-docs/src/release/release-0.5.4.rst +24 -0
- data/embulk-docs/src/release/release-0.5.5.rst +18 -0
- data/embulk-docs/src/release/release-0.6.0.rst +34 -0
- data/embulk-docs/src/release/release-0.6.1.rst +11 -0
- data/embulk-docs/src/release/release-0.6.10.rst +15 -0
- data/embulk-docs/src/release/release-0.6.11.rst +19 -0
- data/embulk-docs/src/release/release-0.6.12.rst +31 -0
- data/embulk-docs/src/release/release-0.6.13.rst +23 -0
- data/embulk-docs/src/release/release-0.6.14.rst +47 -0
- data/embulk-docs/src/release/release-0.6.15.rst +26 -0
- data/embulk-docs/src/release/release-0.6.16.rst +26 -0
- data/embulk-docs/src/release/release-0.6.17.rst +39 -0
- data/embulk-docs/src/release/release-0.6.18.rst +14 -0
- data/embulk-docs/src/release/release-0.6.19.rst +18 -0
- data/embulk-docs/src/release/release-0.6.2.rst +17 -0
- data/embulk-docs/src/release/release-0.6.20.rst +19 -0
- data/embulk-docs/src/release/release-0.6.21.rst +20 -0
- data/embulk-docs/src/release/release-0.6.22.rst +26 -0
- data/embulk-docs/src/release/release-0.6.23.rst +17 -0
- data/embulk-docs/src/release/release-0.6.24.rst +13 -0
- data/embulk-docs/src/release/release-0.6.25.rst +12 -0
- data/embulk-docs/src/release/release-0.6.3.rst +23 -0
- data/embulk-docs/src/release/release-0.6.4.rst +13 -0
- data/embulk-docs/src/release/release-0.6.5.rst +17 -0
- data/embulk-docs/src/release/release-0.6.6.rst +17 -0
- data/embulk-docs/src/release/release-0.6.7.rst +17 -0
- data/embulk-docs/src/release/release-0.6.8.rst +24 -0
- data/embulk-docs/src/release/release-0.6.9.rst +24 -0
- data/embulk-docs/src/release/release-0.7.0.rst +96 -0
- data/embulk-standards/build.gradle +5 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +284 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +379 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +411 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +71 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +203 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +148 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +59 -0
- data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +56 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +53 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +85 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +312 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +75 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +360 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +88 -0
- data/embulk.gemspec +39 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +72 -0
- data/lib/embulk/buffer.rb +22 -0
- data/lib/embulk/column.rb +70 -0
- data/lib/embulk/command/embulk_bundle.rb +56 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_main.rb +2 -0
- data/lib/embulk/command/embulk_migrate_plugin.rb +170 -0
- data/lib/embulk/command/embulk_new_plugin.rb +124 -0
- data/lib/embulk/command/embulk_run.rb +470 -0
- data/lib/embulk/command/embulk_selfupdate.rb +84 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/.ruby-version +1 -0
- data/lib/embulk/data/bundle/Gemfile +26 -0
- data/lib/embulk/data/bundle/embulk/filter/example.rb +42 -0
- data/lib/embulk/data/bundle/embulk/input/example.rb +54 -0
- data/lib/embulk/data/bundle/embulk/output/example.rb +58 -0
- data/lib/embulk/data/new/LICENSE.txt +21 -0
- data/lib/embulk/data/new/README.md.erb +111 -0
- data/lib/embulk/data/new/gitignore.erb +13 -0
- data/lib/embulk/data/new/java/build.gradle.erb +73 -0
- data/lib/embulk/data/new/java/decoder.java.erb +84 -0
- data/lib/embulk/data/new/java/encoder.java.erb +86 -0
- data/lib/embulk/data/new/java/file_input.java.erb +143 -0
- data/lib/embulk/data/new/java/file_output.java.erb +93 -0
- data/lib/embulk/data/new/java/filter.java.erb +56 -0
- data/lib/embulk/data/new/java/formatter.java.erb +54 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/lib/embulk/data/new/java/gradlew +164 -0
- data/lib/embulk/data/new/java/gradlew.bat +90 -0
- data/lib/embulk/data/new/java/input.java.erb +87 -0
- data/lib/embulk/data/new/java/output.java.erb +77 -0
- data/lib/embulk/data/new/java/parser.java.erb +60 -0
- data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
- data/lib/embulk/data/new/java/test.java.erb +5 -0
- data/lib/embulk/data/new/ruby/.ruby-version +1 -0
- data/lib/embulk/data/new/ruby/Gemfile +2 -0
- data/lib/embulk/data/new/ruby/Rakefile +3 -0
- data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +25 -0
- data/lib/embulk/data/new/ruby/filter.rb.erb +41 -0
- data/lib/embulk/data/new/ruby/formatter.rb.erb +49 -0
- data/lib/embulk/data/new/ruby/gemspec.erb +20 -0
- data/lib/embulk/data/new/ruby/input.rb.erb +59 -0
- data/lib/embulk/data/new/ruby/output.rb.erb +61 -0
- data/lib/embulk/data/new/ruby/parser.rb.erb +44 -0
- data/lib/embulk/data/new/ruby/parser_guess.rb.erb +65 -0
- data/lib/embulk/data/package_data.rb +50 -0
- data/lib/embulk/data_source.rb +220 -0
- data/lib/embulk/decoder_plugin.rb +27 -0
- data/lib/embulk/encoder_plugin.rb +27 -0
- data/lib/embulk/error.rb +8 -0
- data/lib/embulk/executor_plugin.rb +23 -0
- data/lib/embulk/file_input.rb +87 -0
- data/lib/embulk/file_input_plugin.rb +27 -0
- data/lib/embulk/file_output.rb +56 -0
- data/lib/embulk/file_output_plugin.rb +27 -0
- data/lib/embulk/filter_plugin.rb +105 -0
- data/lib/embulk/formatter_plugin.rb +105 -0
- data/lib/embulk/guess/charset.rb +44 -0
- data/lib/embulk/guess/csv.rb +327 -0
- data/lib/embulk/guess/gzip.rb +18 -0
- data/lib/embulk/guess/newline.rb +22 -0
- data/lib/embulk/guess/schema_guess.rb +118 -0
- data/lib/embulk/guess/time_format_guess.rb +394 -0
- data/lib/embulk/guess_plugin.rb +129 -0
- data/lib/embulk/input_plugin.rb +121 -0
- data/lib/embulk/java/bootstrap.rb +24 -0
- data/lib/embulk/java/imports.rb +69 -0
- data/lib/embulk/java/time_helper.rb +79 -0
- data/lib/embulk/java_plugin.rb +90 -0
- data/lib/embulk/logger.rb +154 -0
- data/lib/embulk/output_plugin.rb +150 -0
- data/lib/embulk/page.rb +30 -0
- data/lib/embulk/page_builder.rb +76 -0
- data/lib/embulk/parser_plugin.rb +78 -0
- data/lib/embulk/plugin.rb +239 -0
- data/lib/embulk/plugin_registry.rb +96 -0
- data/lib/embulk/runner.rb +184 -0
- data/lib/embulk/schema.rb +103 -0
- data/lib/embulk/version.rb +3 -0
- data/settings.gradle +6 -0
- data/test/guess/test_schema_guess.rb +11 -0
- data/test/guess/test_time_format_guess.rb +133 -0
- data/test/helper.rb +21 -0
- data/test/run-test.rb +14 -0
- metadata +566 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import org.slf4j.ILoggerFactory;
|
|
4
|
+
import com.google.common.base.Preconditions;
|
|
5
|
+
import com.google.inject.Module;
|
|
6
|
+
import com.google.inject.Binder;
|
|
7
|
+
import com.google.inject.Scopes;
|
|
8
|
+
import com.fasterxml.jackson.module.guice.ObjectMapperModule;
|
|
9
|
+
import com.fasterxml.jackson.datatype.guava.GuavaModule;
|
|
10
|
+
import com.fasterxml.jackson.datatype.joda.JodaModule;
|
|
11
|
+
import org.embulk.config.ModelManager;
|
|
12
|
+
import org.embulk.spi.time.DateTimeZoneSerDe;
|
|
13
|
+
import org.embulk.spi.time.TimestampSerDe;
|
|
14
|
+
import org.embulk.spi.ParserPlugin;
|
|
15
|
+
import org.embulk.spi.ExecutorPlugin;
|
|
16
|
+
import org.embulk.spi.BufferAllocator;
|
|
17
|
+
import org.embulk.spi.util.CharsetSerDe;
|
|
18
|
+
import org.embulk.spi.unit.LocalFileSerDe;
|
|
19
|
+
import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
|
|
20
|
+
|
|
21
|
+
public class ExecModule
|
|
22
|
+
implements Module
|
|
23
|
+
{
|
|
24
|
+
@Override
|
|
25
|
+
public void configure(Binder binder)
|
|
26
|
+
{
|
|
27
|
+
Preconditions.checkNotNull(binder, "binder is null.");
|
|
28
|
+
|
|
29
|
+
binder.bind(ILoggerFactory.class).toProvider(LoggerProvider.class).in(Scopes.SINGLETON);
|
|
30
|
+
binder.bind(ModelManager.class).in(Scopes.SINGLETON);
|
|
31
|
+
binder.bind(BufferAllocator.class).to(PooledBufferAllocator.class).in(Scopes.SINGLETON);
|
|
32
|
+
binder.bind(TempFileAllocator.class).in(Scopes.SINGLETON);
|
|
33
|
+
|
|
34
|
+
// GuessExecutor
|
|
35
|
+
registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
|
|
36
|
+
registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
|
|
37
|
+
|
|
38
|
+
// LocalExecutorPlugin
|
|
39
|
+
binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
|
|
40
|
+
registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
|
|
41
|
+
|
|
42
|
+
// serde
|
|
43
|
+
ObjectMapperModule mapper = new ObjectMapperModule();
|
|
44
|
+
DateTimeZoneSerDe.configure(mapper);
|
|
45
|
+
TimestampSerDe.configure(mapper);
|
|
46
|
+
CharsetSerDe.configure(mapper);
|
|
47
|
+
LocalFileSerDe.configure(mapper);
|
|
48
|
+
mapper.registerModule(new GuavaModule()); // jackson-datatype-guava
|
|
49
|
+
mapper.registerModule(new JodaModule()); // jackson-datatype-joda
|
|
50
|
+
mapper.configure(binder);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import org.embulk.config.ConfigDiff;
|
|
5
|
+
|
|
6
|
+
public class ExecutionResult
|
|
7
|
+
{
|
|
8
|
+
private final ConfigDiff configDiff;
|
|
9
|
+
private final List<Throwable> ignoredExceptions;
|
|
10
|
+
|
|
11
|
+
public ExecutionResult(ConfigDiff configDiff, List<Throwable> ignoredExceptions)
|
|
12
|
+
{
|
|
13
|
+
this.configDiff = configDiff;
|
|
14
|
+
this.ignoredExceptions = ignoredExceptions;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
public ConfigDiff getConfigDiff()
|
|
18
|
+
{
|
|
19
|
+
return configDiff;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
public List<Throwable> getIgnoredExceptions()
|
|
23
|
+
{
|
|
24
|
+
return ignoredExceptions;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.ServiceLoader;
|
|
4
|
+
import com.google.inject.Module;
|
|
5
|
+
import com.google.inject.Binder;
|
|
6
|
+
import org.embulk.config.ConfigSource;
|
|
7
|
+
import org.embulk.spi.Extension;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* ExtensionServiceLoaderModule loads Extensions using java.util.ServiceLoader
|
|
11
|
+
* mechanism.
|
|
12
|
+
* Jar packages providing an extension need to include
|
|
13
|
+
* META-INF/services/org.embulk.exec.Extension file. Contents of the file is
|
|
14
|
+
* one-line text of the extension class name (e.g. com.example.MyPluginSourceExtension).
|
|
15
|
+
*/
|
|
16
|
+
public class ExtensionServiceLoaderModule
|
|
17
|
+
implements Module
|
|
18
|
+
{
|
|
19
|
+
private final ClassLoader classLoader;
|
|
20
|
+
private final ConfigSource systemConfig;
|
|
21
|
+
|
|
22
|
+
public ExtensionServiceLoaderModule(ConfigSource systemConfig)
|
|
23
|
+
{
|
|
24
|
+
this(ExtensionServiceLoaderModule.class.getClassLoader(), systemConfig);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
public ExtensionServiceLoaderModule(ClassLoader classLoader, ConfigSource systemConfig)
|
|
28
|
+
{
|
|
29
|
+
this.classLoader = classLoader;
|
|
30
|
+
this.systemConfig = systemConfig;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
@Override
|
|
34
|
+
public void configure(Binder binder)
|
|
35
|
+
{
|
|
36
|
+
ServiceLoader<Extension> serviceLoader = ServiceLoader.load(Extension.class, classLoader);
|
|
37
|
+
for (Extension extension : serviceLoader) {
|
|
38
|
+
for (Module module : extension.getModules(systemConfig)) {
|
|
39
|
+
module.configure(binder);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import javax.inject.Qualifier;
|
|
4
|
+
import java.lang.annotation.Retention;
|
|
5
|
+
import java.lang.annotation.Target;
|
|
6
|
+
import static java.lang.annotation.ElementType.FIELD;
|
|
7
|
+
import static java.lang.annotation.ElementType.METHOD;
|
|
8
|
+
import static java.lang.annotation.ElementType.PARAMETER;
|
|
9
|
+
import static java.lang.annotation.RetentionPolicy.RUNTIME;
|
|
10
|
+
|
|
11
|
+
@Retention(RUNTIME)
|
|
12
|
+
@Target({FIELD, PARAMETER, METHOD})
|
|
13
|
+
@Qualifier
|
|
14
|
+
public @interface ForGuess
|
|
15
|
+
{
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import javax.inject.Qualifier;
|
|
4
|
+
import java.lang.annotation.Retention;
|
|
5
|
+
import java.lang.annotation.Target;
|
|
6
|
+
import static java.lang.annotation.ElementType.FIELD;
|
|
7
|
+
import static java.lang.annotation.ElementType.METHOD;
|
|
8
|
+
import static java.lang.annotation.ElementType.PARAMETER;
|
|
9
|
+
import static java.lang.annotation.RetentionPolicy.RUNTIME;
|
|
10
|
+
|
|
11
|
+
@Retention(RUNTIME)
|
|
12
|
+
@Target({FIELD, PARAMETER, METHOD})
|
|
13
|
+
@Qualifier
|
|
14
|
+
public @interface ForSystemConfig
|
|
15
|
+
{
|
|
16
|
+
}
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
package org.embulk.exec;
|
|
2
|
+
|
|
3
|
+
import java.util.List;
|
|
4
|
+
import java.util.Set;
|
|
5
|
+
import java.util.ArrayList;
|
|
6
|
+
import java.util.concurrent.ExecutionException;
|
|
7
|
+
import com.google.common.collect.ImmutableList;
|
|
8
|
+
import com.google.common.base.Throwables;
|
|
9
|
+
import com.google.inject.Inject;
|
|
10
|
+
import com.google.inject.Binder;
|
|
11
|
+
import com.google.inject.multibindings.Multibinder;
|
|
12
|
+
import org.embulk.plugin.PluginType;
|
|
13
|
+
import org.embulk.config.Config;
|
|
14
|
+
import org.embulk.config.ConfigDefault;
|
|
15
|
+
import org.embulk.config.ConfigDiff;
|
|
16
|
+
import org.embulk.config.Task;
|
|
17
|
+
import org.embulk.config.TaskSource;
|
|
18
|
+
import org.embulk.config.ConfigSource;
|
|
19
|
+
import org.embulk.config.TaskReport;
|
|
20
|
+
import org.embulk.spi.Schema;
|
|
21
|
+
import org.embulk.spi.Page;
|
|
22
|
+
import org.embulk.spi.Buffer;
|
|
23
|
+
import org.embulk.spi.InputPlugin;
|
|
24
|
+
import org.embulk.spi.FileInputPlugin;
|
|
25
|
+
import org.embulk.spi.ParserPlugin;
|
|
26
|
+
import org.embulk.spi.GuessPlugin;
|
|
27
|
+
import org.embulk.spi.Exec;
|
|
28
|
+
import org.embulk.spi.ExecAction;
|
|
29
|
+
import org.embulk.spi.ExecSession;
|
|
30
|
+
import org.embulk.spi.FileInput;
|
|
31
|
+
import org.embulk.spi.PageOutput;
|
|
32
|
+
import org.embulk.spi.TransactionalFileInput;
|
|
33
|
+
import org.embulk.spi.FileInputRunner;
|
|
34
|
+
|
|
35
|
+
public class GuessExecutor
|
|
36
|
+
{
|
|
37
|
+
private final List<PluginType> defaultGuessPlugins;
|
|
38
|
+
|
|
39
|
+
private interface GuessExecutorSystemTask
|
|
40
|
+
extends Task
|
|
41
|
+
{
|
|
42
|
+
@Config("guess_plugins")
|
|
43
|
+
@ConfigDefault("[]")
|
|
44
|
+
public List<PluginType> getGuessPlugins();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
private interface GuessExecutorTask
|
|
48
|
+
extends Task
|
|
49
|
+
{
|
|
50
|
+
@Config("guess_plugins")
|
|
51
|
+
@ConfigDefault("[]")
|
|
52
|
+
public List<PluginType> getGuessPlugins();
|
|
53
|
+
|
|
54
|
+
@Config("exclude_guess_plugins")
|
|
55
|
+
@ConfigDefault("[]")
|
|
56
|
+
public List<PluginType> getExcludeGuessPlugins();
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
public static void registerDefaultGuessPluginTo(Binder binder, PluginType type)
|
|
60
|
+
{
|
|
61
|
+
Multibinder<PluginType> multibinder = Multibinder.newSetBinder(binder, PluginType.class, ForGuess.class);
|
|
62
|
+
multibinder.addBinding().toInstance(type);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@Inject
|
|
66
|
+
public GuessExecutor(@ForSystemConfig ConfigSource systemConfig,
|
|
67
|
+
@ForGuess Set<PluginType> defaultGuessPlugins)
|
|
68
|
+
{
|
|
69
|
+
GuessExecutorSystemTask systemTask = systemConfig.loadConfig(GuessExecutorSystemTask.class);
|
|
70
|
+
|
|
71
|
+
ImmutableList.Builder<PluginType> list = ImmutableList.builder();
|
|
72
|
+
list.addAll(defaultGuessPlugins);
|
|
73
|
+
list.addAll(systemTask.getGuessPlugins());
|
|
74
|
+
this.defaultGuessPlugins = list.build();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
public ConfigDiff guess(ExecSession exec, final ConfigSource config)
|
|
78
|
+
{
|
|
79
|
+
try {
|
|
80
|
+
return Exec.doWith(exec, new ExecAction<ConfigDiff>() {
|
|
81
|
+
public ConfigDiff run()
|
|
82
|
+
{
|
|
83
|
+
try (SetCurrentThreadName dontCare = new SetCurrentThreadName("guess")) {
|
|
84
|
+
return doGuess(config);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
} catch (ExecutionException ex) {
|
|
89
|
+
throw Throwables.propagate(ex.getCause());
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
protected InputPlugin newInputPlugin(ConfigSource inputConfig)
|
|
94
|
+
{
|
|
95
|
+
return Exec.newPlugin(InputPlugin.class, inputConfig.get(PluginType.class, "type"));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
private ConfigDiff doGuess(ConfigSource config)
|
|
99
|
+
{
|
|
100
|
+
ConfigSource inputConfig = config.getNested("in");
|
|
101
|
+
|
|
102
|
+
InputPlugin input = newInputPlugin(inputConfig);
|
|
103
|
+
|
|
104
|
+
ConfigDiff inputGuessed;
|
|
105
|
+
try {
|
|
106
|
+
inputGuessed = input.guess(inputConfig);
|
|
107
|
+
} catch (AbstractMethodError ex) {
|
|
108
|
+
// for backward compatibility with embulk v0.4 interface
|
|
109
|
+
throw new UnsupportedOperationException(input.getClass().getSimpleName()+".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
ConfigDiff wrapped = Exec.newConfigDiff();
|
|
113
|
+
wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
|
|
114
|
+
return wrapped;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// called by FileInputRunner
|
|
118
|
+
public ConfigDiff guessParserConfig(Buffer sample, ConfigSource inputConfig, ConfigSource execConfig)
|
|
119
|
+
{
|
|
120
|
+
List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
|
|
121
|
+
|
|
122
|
+
GuessExecutorTask task = execConfig.loadConfig(GuessExecutorTask.class);
|
|
123
|
+
guessPlugins.addAll(task.getGuessPlugins());
|
|
124
|
+
guessPlugins.removeAll(task.getExcludeGuessPlugins());
|
|
125
|
+
|
|
126
|
+
return guessParserConfig(sample, inputConfig, guessPlugins);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
private ConfigDiff guessParserConfig(Buffer sample,
|
|
130
|
+
ConfigSource config, List<PluginType> guessPlugins)
|
|
131
|
+
{
|
|
132
|
+
// repeat guessing upto 10 times
|
|
133
|
+
ConfigDiff lastGuessed = Exec.newConfigDiff();
|
|
134
|
+
for (int i=0; i < 10; i++) {
|
|
135
|
+
// include last-guessed config to run guess input
|
|
136
|
+
ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
|
|
137
|
+
ConfigSource guessInputConfig = originalConfig.deepCopy();
|
|
138
|
+
guessInputConfig.getNestedOrSetEmpty("parser")
|
|
139
|
+
.set("type", "system_guess") // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
|
|
140
|
+
.set("guess_plugins", guessPlugins)
|
|
141
|
+
.set("orig_config", originalConfig);
|
|
142
|
+
|
|
143
|
+
// run FileInputPlugin
|
|
144
|
+
final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
|
|
145
|
+
ConfigDiff guessed;
|
|
146
|
+
try {
|
|
147
|
+
input.transaction(guessInputConfig, new InputPlugin.Control() {
|
|
148
|
+
public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount)
|
|
149
|
+
{
|
|
150
|
+
if (taskCount == 0) {
|
|
151
|
+
throw new NoSampleException("No input files to guess");
|
|
152
|
+
}
|
|
153
|
+
input.run(inputTaskSource, null, 0, new PageOutput() {
|
|
154
|
+
@Override
|
|
155
|
+
public void add(Page page)
|
|
156
|
+
{
|
|
157
|
+
throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
@Override
|
|
161
|
+
public void finish() { }
|
|
162
|
+
|
|
163
|
+
@Override
|
|
164
|
+
public void close() { }
|
|
165
|
+
});
|
|
166
|
+
throw new AssertionError("Guess executor must throw GuessedNoticeError");
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
throw new AssertionError("Guess executor must throw GuessedNoticeError");
|
|
171
|
+
|
|
172
|
+
} catch (GuessedNoticeError error) {
|
|
173
|
+
guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// merge to the last-guessed config
|
|
177
|
+
if (lastGuessed.equals(guessed)) {
|
|
178
|
+
// not changed
|
|
179
|
+
return lastGuessed;
|
|
180
|
+
}
|
|
181
|
+
lastGuessed = guessed;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return lastGuessed;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
private static class BufferFileInputPlugin
|
|
188
|
+
implements FileInputPlugin
|
|
189
|
+
{
|
|
190
|
+
private Buffer buffer;
|
|
191
|
+
|
|
192
|
+
public BufferFileInputPlugin(Buffer buffer)
|
|
193
|
+
{
|
|
194
|
+
this.buffer = buffer;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
|
|
198
|
+
{
|
|
199
|
+
control.run(Exec.newTaskSource(), 1);
|
|
200
|
+
return Exec.newConfigDiff();
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
public ConfigDiff resume(TaskSource taskSource,
|
|
204
|
+
int taskCount,
|
|
205
|
+
FileInputPlugin.Control control)
|
|
206
|
+
{
|
|
207
|
+
throw new UnsupportedOperationException();
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
public void cleanup(TaskSource taskSource,
|
|
211
|
+
int taskCount,
|
|
212
|
+
List<TaskReport> successTaskReports)
|
|
213
|
+
{
|
|
214
|
+
if (buffer != null) {
|
|
215
|
+
buffer.release();
|
|
216
|
+
buffer = null;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
|
|
221
|
+
{
|
|
222
|
+
return new BufferTransactionalFileInput(buffer);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
private static class BufferTransactionalFileInput
|
|
227
|
+
implements TransactionalFileInput
|
|
228
|
+
{
|
|
229
|
+
private Buffer buffer;
|
|
230
|
+
|
|
231
|
+
public BufferTransactionalFileInput(Buffer buffer)
|
|
232
|
+
{
|
|
233
|
+
this.buffer = buffer;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
@Override
|
|
237
|
+
public Buffer poll()
|
|
238
|
+
{
|
|
239
|
+
Buffer b = buffer;
|
|
240
|
+
buffer = null;
|
|
241
|
+
return b;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
@Override
|
|
245
|
+
public boolean nextFile()
|
|
246
|
+
{
|
|
247
|
+
return buffer != null;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
@Override
|
|
251
|
+
public void close() { }
|
|
252
|
+
|
|
253
|
+
@Override
|
|
254
|
+
public void abort() { }
|
|
255
|
+
|
|
256
|
+
@Override
|
|
257
|
+
public TaskReport commit()
|
|
258
|
+
{
|
|
259
|
+
return null;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
public static class GuessParserPlugin
|
|
264
|
+
implements ParserPlugin
|
|
265
|
+
{
|
|
266
|
+
private interface PluginTask
|
|
267
|
+
extends Task
|
|
268
|
+
{
|
|
269
|
+
@Config("guess_plugins")
|
|
270
|
+
public List<PluginType> getGuessPluginTypes();
|
|
271
|
+
|
|
272
|
+
@Config("orig_config")
|
|
273
|
+
public ConfigSource getOriginalConfig();
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
@Override
|
|
277
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
|
278
|
+
{
|
|
279
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
|
280
|
+
control.run(task.dump(), null);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
@Override
|
|
284
|
+
public void run(TaskSource taskSource, Schema schema,
|
|
285
|
+
FileInput input, PageOutput pageOutput)
|
|
286
|
+
{
|
|
287
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
288
|
+
final ConfigSource originalConfig = task.getOriginalConfig();
|
|
289
|
+
|
|
290
|
+
// get sample buffer
|
|
291
|
+
Buffer sample = getFirstBuffer(input);
|
|
292
|
+
|
|
293
|
+
// load guess plugins
|
|
294
|
+
ImmutableList.Builder<GuessPlugin> builder = ImmutableList.builder();
|
|
295
|
+
for (PluginType guessType : task.getGuessPluginTypes()) {
|
|
296
|
+
GuessPlugin guess = Exec.newPlugin(GuessPlugin.class, guessType);
|
|
297
|
+
builder.add(guess);
|
|
298
|
+
}
|
|
299
|
+
List<GuessPlugin> guesses = builder.build();
|
|
300
|
+
|
|
301
|
+
// run guess plugins
|
|
302
|
+
ConfigSource mergedConfig = originalConfig.deepCopy();
|
|
303
|
+
ConfigDiff mergedGuessed = Exec.newConfigDiff();
|
|
304
|
+
for (int i=0; i < guesses.size(); i++) {
|
|
305
|
+
ConfigDiff guessed = guesses.get(i).guess(originalConfig, sample);
|
|
306
|
+
guessed = addAssumedDecoderConfigs(originalConfig, guessed);
|
|
307
|
+
mergedGuessed.merge(guessed);
|
|
308
|
+
mergedConfig.merge(mergedGuessed);
|
|
309
|
+
if (!mergedConfig.equals(originalConfig)) {
|
|
310
|
+
// config updated
|
|
311
|
+
throw new GuessedNoticeError(mergedGuessed);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
throw new GuessedNoticeError(mergedGuessed);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
private static Buffer getFirstBuffer(FileInput input)
|
|
318
|
+
{
|
|
319
|
+
// The first buffer is created by SamplingParserPlugin. See FileInputRunner.guess.
|
|
320
|
+
RuntimeException decodeException = null;
|
|
321
|
+
try {
|
|
322
|
+
while (input.nextFile()) {
|
|
323
|
+
Buffer sample = input.poll();
|
|
324
|
+
if (sample != null) {
|
|
325
|
+
return sample;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
} catch (RuntimeException ex) {
|
|
329
|
+
// ignores exceptions because FileDecoderPlugin can throw exceptions
|
|
330
|
+
// such as "Unexpected end of ZLIB input stream"
|
|
331
|
+
decodeException = ex;
|
|
332
|
+
}
|
|
333
|
+
if (decodeException != null) {
|
|
334
|
+
throw decodeException;
|
|
335
|
+
}
|
|
336
|
+
throw new NoSampleException("No input buffer to guess");
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
private static class ConfigSourceList extends ArrayList<ConfigSource> { };
|
|
340
|
+
|
|
341
|
+
private static ConfigDiff addAssumedDecoderConfigs(ConfigSource originalConfig, ConfigDiff guessed)
|
|
342
|
+
{
|
|
343
|
+
List<ConfigSource> guessedDecoders = guessed.get(ConfigSourceList.class, "decoders", null);
|
|
344
|
+
if (guessedDecoders == null) {
|
|
345
|
+
return guessed;
|
|
346
|
+
} else {
|
|
347
|
+
List<ConfigSource> assumedDecoders = originalConfig.get(ConfigSourceList.class, "decoders", new ConfigSourceList());
|
|
348
|
+
ImmutableList.Builder<ConfigSource> added = ImmutableList.builder();
|
|
349
|
+
for (ConfigSource assuemed : assumedDecoders) {
|
|
350
|
+
added.add(Exec.newConfigSource());
|
|
351
|
+
}
|
|
352
|
+
added.addAll(guessedDecoders);
|
|
353
|
+
return guessed.set("decoders", added.build());
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
public static class GuessedNoticeError
|
|
359
|
+
extends Error
|
|
360
|
+
{
|
|
361
|
+
private final ConfigDiff guessedConfig;
|
|
362
|
+
|
|
363
|
+
public GuessedNoticeError(ConfigDiff guessedConfig)
|
|
364
|
+
{
|
|
365
|
+
this.guessedConfig = guessedConfig;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
public ConfigDiff getGuessedConfig()
|
|
369
|
+
{
|
|
370
|
+
return guessedConfig;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|