embulk 0.7.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +18 -0
  5. data/COPYING +14 -0
  6. data/Gemfile +2 -0
  7. data/Gemfile.lock +31 -0
  8. data/README.md +206 -0
  9. data/Rakefile +26 -0
  10. data/appveyor.yml +20 -0
  11. data/bin/embulk +106 -0
  12. data/build.gradle +338 -0
  13. data/embulk-cli/build.gradle +6 -0
  14. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +22 -0
  15. data/embulk-cli/src/main/sh/selfrun.sh +158 -0
  16. data/embulk-cli/src/test/java/org/embulk/cli/DummyMain.java +23 -0
  17. data/embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java +281 -0
  18. data/embulk-core/build.gradle +59 -0
  19. data/embulk-core/src/main/java/org/embulk/EmbulkEmbed.java +315 -0
  20. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +76 -0
  21. data/embulk-core/src/main/java/org/embulk/command/PreviewPrinter.java +84 -0
  22. data/embulk-core/src/main/java/org/embulk/command/TablePreviewPrinter.java +107 -0
  23. data/embulk-core/src/main/java/org/embulk/command/VerticalPreviewPrinter.java +47 -0
  24. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +33 -0
  25. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  26. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +29 -0
  28. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  29. data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +14 -0
  30. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +141 -0
  31. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +31 -0
  32. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +39 -0
  33. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +231 -0
  34. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +84 -0
  35. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  36. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +123 -0
  37. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  38. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  39. data/embulk-core/src/main/java/org/embulk/config/TaskReport.java +29 -0
  40. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +345 -0
  41. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +31 -0
  42. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +38 -0
  43. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +652 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +52 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +10 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +26 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/ForGuess.java +16 -0
  50. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  51. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +373 -0
  52. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +129 -0
  53. data/embulk-core/src/main/java/org/embulk/exec/LocalThreadExecutor.java +34 -0
  54. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +60 -0
  55. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  56. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +18 -0
  57. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +77 -0
  58. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +183 -0
  59. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  60. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  61. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +100 -0
  62. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +136 -0
  63. data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +19 -0
  64. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  65. data/embulk-core/src/main/java/org/embulk/exec/TempFileAllocator.java +35 -0
  66. data/embulk-core/src/main/java/org/embulk/guice/Bootstrap.java +157 -0
  67. data/embulk-core/src/main/java/org/embulk/guice/CloseableInjector.java +22 -0
  68. data/embulk-core/src/main/java/org/embulk/guice/InjectorProxy.java +145 -0
  69. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjector.java +26 -0
  70. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleInjectorProxy.java +61 -0
  71. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleManager.java +187 -0
  72. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethods.java +89 -0
  73. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleMethodsMap.java +38 -0
  74. data/embulk-core/src/main/java/org/embulk/guice/LifeCycleModule.java +97 -0
  75. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +72 -0
  76. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +119 -0
  77. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  78. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +96 -0
  79. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +168 -0
  80. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +9 -0
  81. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +71 -0
  82. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +78 -0
  83. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  84. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  85. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  86. data/embulk-core/src/main/java/org/embulk/plugin/compat/InputPluginWrapper.java +102 -0
  87. data/embulk-core/src/main/java/org/embulk/plugin/compat/PluginWrappers.java +30 -0
  88. data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileInputWrapper.java +96 -0
  89. data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileOutputWrapper.java +102 -0
  90. data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalPageOutputWrapper.java +95 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +148 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +112 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +14 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +113 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +217 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +19 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +44 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +30 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +162 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +28 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +202 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +18 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +33 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +29 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/Page.java +51 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +338 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +226 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +10 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +117 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +134 -0
  123. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +93 -0
  124. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfigException.java +22 -0
  125. data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +81 -0
  126. data/embulk-core/src/main/java/org/embulk/spi/TempFileException.java +19 -0
  127. data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +87 -0
  128. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  129. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  130. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  131. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  132. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +55 -0
  133. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  134. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  135. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  136. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +100 -0
  137. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +97 -0
  138. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +10 -0
  139. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +104 -0
  140. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +49 -0
  141. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +58 -0
  142. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  143. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  144. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  145. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  146. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +41 -0
  147. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  148. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +44 -0
  149. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  150. data/embulk-core/src/main/java/org/embulk/spi/unit/ByteSize.java +156 -0
  151. data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFile.java +106 -0
  152. data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFileSerDe.java +113 -0
  153. data/embulk-core/src/main/java/org/embulk/spi/unit/ToString.java +54 -0
  154. data/embulk-core/src/main/java/org/embulk/spi/unit/ToStringMap.java +34 -0
  155. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  156. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  157. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnNotFoundException.java +10 -0
  158. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetter.java +18 -0
  159. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +94 -0
  160. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +161 -0
  161. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  162. data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +95 -0
  163. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +111 -0
  164. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +119 -0
  165. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +100 -0
  166. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +190 -0
  167. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamTransactionalFileInput.java +25 -0
  168. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +65 -0
  169. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  170. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +123 -0
  171. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  172. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  173. data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +88 -0
  174. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  175. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  176. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +128 -0
  177. data/embulk-core/src/main/java/org/embulk/spi/util/RetryExecutor.java +130 -0
  178. data/embulk-core/src/main/java/org/embulk/spi/util/Timestamps.java +53 -0
  179. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +79 -0
  180. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +64 -0
  181. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +18 -0
  182. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +61 -0
  183. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +69 -0
  184. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +34 -0
  185. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +52 -0
  186. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +56 -0
  187. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +64 -0
  188. data/embulk-core/src/main/resources/embulk/logback-color.xml +72 -0
  189. data/embulk-core/src/main/resources/embulk/logback-console.xml +14 -0
  190. data/embulk-core/src/main/resources/embulk/logback-file.xml +25 -0
  191. data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +70 -0
  192. data/embulk-core/src/main/resources/embulk/parent_first_resources.properties +28 -0
  193. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +114 -0
  194. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  195. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  196. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  197. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  198. data/embulk-core/src/test/java/org/embulk/config/TestConfigLoader.java +66 -0
  199. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  200. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  201. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +58 -0
  202. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  203. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  204. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  205. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  206. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +57 -0
  207. data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +24 -0
  208. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +89 -0
  209. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +196 -0
  210. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +207 -0
  211. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  212. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +319 -0
  213. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  214. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +63 -0
  215. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java +67 -0
  216. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  217. data/embulk-core/src/test/java/org/embulk/spi/unit/TestByteSize.java +79 -0
  218. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  219. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  220. data/embulk-docs/Makefile +178 -0
  221. data/embulk-docs/build.gradle +32 -0
  222. data/embulk-docs/make.bat +243 -0
  223. data/embulk-docs/push-gh-pages.sh +49 -0
  224. data/embulk-docs/src/_static/embulk-architecture.png +0 -0
  225. data/embulk-docs/src/_static/embulk-logo.png +0 -0
  226. data/embulk-docs/src/_static/embulk-logo.svg +133 -0
  227. data/embulk-docs/src/built-in.rst +440 -0
  228. data/embulk-docs/src/conf.py +260 -0
  229. data/embulk-docs/src/customization.rst +184 -0
  230. data/embulk-docs/src/index.rst +84 -0
  231. data/embulk-docs/src/recipe.rst +8 -0
  232. data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana4.rst +153 -0
  233. data/embulk-docs/src/release.rst +57 -0
  234. data/embulk-docs/src/release/release-0.1.0.rst +8 -0
  235. data/embulk-docs/src/release/release-0.2.0.rst +16 -0
  236. data/embulk-docs/src/release/release-0.2.1.rst +19 -0
  237. data/embulk-docs/src/release/release-0.3.0.rst +34 -0
  238. data/embulk-docs/src/release/release-0.3.1.rst +11 -0
  239. data/embulk-docs/src/release/release-0.3.2.rst +15 -0
  240. data/embulk-docs/src/release/release-0.4.0.rst +74 -0
  241. data/embulk-docs/src/release/release-0.4.1.rst +18 -0
  242. data/embulk-docs/src/release/release-0.4.10.rst +17 -0
  243. data/embulk-docs/src/release/release-0.4.2.rst +18 -0
  244. data/embulk-docs/src/release/release-0.4.3.rst +34 -0
  245. data/embulk-docs/src/release/release-0.4.4.rst +39 -0
  246. data/embulk-docs/src/release/release-0.4.5.rst +24 -0
  247. data/embulk-docs/src/release/release-0.4.6.rst +30 -0
  248. data/embulk-docs/src/release/release-0.4.7.rst +16 -0
  249. data/embulk-docs/src/release/release-0.4.8.rst +15 -0
  250. data/embulk-docs/src/release/release-0.4.9.rst +23 -0
  251. data/embulk-docs/src/release/release-0.5.0.rst +89 -0
  252. data/embulk-docs/src/release/release-0.5.1.rst +13 -0
  253. data/embulk-docs/src/release/release-0.5.2.rst +30 -0
  254. data/embulk-docs/src/release/release-0.5.3.rst +22 -0
  255. data/embulk-docs/src/release/release-0.5.4.rst +24 -0
  256. data/embulk-docs/src/release/release-0.5.5.rst +18 -0
  257. data/embulk-docs/src/release/release-0.6.0.rst +34 -0
  258. data/embulk-docs/src/release/release-0.6.1.rst +11 -0
  259. data/embulk-docs/src/release/release-0.6.10.rst +15 -0
  260. data/embulk-docs/src/release/release-0.6.11.rst +19 -0
  261. data/embulk-docs/src/release/release-0.6.12.rst +31 -0
  262. data/embulk-docs/src/release/release-0.6.13.rst +23 -0
  263. data/embulk-docs/src/release/release-0.6.14.rst +47 -0
  264. data/embulk-docs/src/release/release-0.6.15.rst +26 -0
  265. data/embulk-docs/src/release/release-0.6.16.rst +26 -0
  266. data/embulk-docs/src/release/release-0.6.17.rst +39 -0
  267. data/embulk-docs/src/release/release-0.6.18.rst +14 -0
  268. data/embulk-docs/src/release/release-0.6.19.rst +18 -0
  269. data/embulk-docs/src/release/release-0.6.2.rst +17 -0
  270. data/embulk-docs/src/release/release-0.6.20.rst +19 -0
  271. data/embulk-docs/src/release/release-0.6.21.rst +20 -0
  272. data/embulk-docs/src/release/release-0.6.22.rst +26 -0
  273. data/embulk-docs/src/release/release-0.6.23.rst +17 -0
  274. data/embulk-docs/src/release/release-0.6.24.rst +13 -0
  275. data/embulk-docs/src/release/release-0.6.25.rst +12 -0
  276. data/embulk-docs/src/release/release-0.6.3.rst +23 -0
  277. data/embulk-docs/src/release/release-0.6.4.rst +13 -0
  278. data/embulk-docs/src/release/release-0.6.5.rst +17 -0
  279. data/embulk-docs/src/release/release-0.6.6.rst +17 -0
  280. data/embulk-docs/src/release/release-0.6.7.rst +17 -0
  281. data/embulk-docs/src/release/release-0.6.8.rst +24 -0
  282. data/embulk-docs/src/release/release-0.6.9.rst +24 -0
  283. data/embulk-docs/src/release/release-0.7.0.rst +96 -0
  284. data/embulk-standards/build.gradle +5 -0
  285. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +284 -0
  286. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +379 -0
  287. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +411 -0
  288. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  289. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +71 -0
  290. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +203 -0
  291. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +148 -0
  292. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +59 -0
  293. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +56 -0
  294. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  295. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +53 -0
  296. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +85 -0
  297. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  298. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +312 -0
  299. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +75 -0
  300. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +360 -0
  301. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +88 -0
  302. data/embulk.gemspec +39 -0
  303. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  304. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  305. data/gradlew +164 -0
  306. data/gradlew.bat +90 -0
  307. data/lib/embulk.rb +72 -0
  308. data/lib/embulk/buffer.rb +22 -0
  309. data/lib/embulk/column.rb +70 -0
  310. data/lib/embulk/command/embulk_bundle.rb +56 -0
  311. data/lib/embulk/command/embulk_example.rb +32 -0
  312. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  313. data/lib/embulk/command/embulk_main.rb +2 -0
  314. data/lib/embulk/command/embulk_migrate_plugin.rb +170 -0
  315. data/lib/embulk/command/embulk_new_plugin.rb +124 -0
  316. data/lib/embulk/command/embulk_run.rb +470 -0
  317. data/lib/embulk/command/embulk_selfupdate.rb +84 -0
  318. data/lib/embulk/data/bundle/.bundle/config +3 -0
  319. data/lib/embulk/data/bundle/.ruby-version +1 -0
  320. data/lib/embulk/data/bundle/Gemfile +26 -0
  321. data/lib/embulk/data/bundle/embulk/filter/example.rb +42 -0
  322. data/lib/embulk/data/bundle/embulk/input/example.rb +54 -0
  323. data/lib/embulk/data/bundle/embulk/output/example.rb +58 -0
  324. data/lib/embulk/data/new/LICENSE.txt +21 -0
  325. data/lib/embulk/data/new/README.md.erb +111 -0
  326. data/lib/embulk/data/new/gitignore.erb +13 -0
  327. data/lib/embulk/data/new/java/build.gradle.erb +73 -0
  328. data/lib/embulk/data/new/java/decoder.java.erb +84 -0
  329. data/lib/embulk/data/new/java/encoder.java.erb +86 -0
  330. data/lib/embulk/data/new/java/file_input.java.erb +143 -0
  331. data/lib/embulk/data/new/java/file_output.java.erb +93 -0
  332. data/lib/embulk/data/new/java/filter.java.erb +56 -0
  333. data/lib/embulk/data/new/java/formatter.java.erb +54 -0
  334. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  335. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +6 -0
  336. data/lib/embulk/data/new/java/gradlew +164 -0
  337. data/lib/embulk/data/new/java/gradlew.bat +90 -0
  338. data/lib/embulk/data/new/java/input.java.erb +87 -0
  339. data/lib/embulk/data/new/java/output.java.erb +77 -0
  340. data/lib/embulk/data/new/java/parser.java.erb +60 -0
  341. data/lib/embulk/data/new/java/plugin_loader.rb.erb +3 -0
  342. data/lib/embulk/data/new/java/test.java.erb +5 -0
  343. data/lib/embulk/data/new/ruby/.ruby-version +1 -0
  344. data/lib/embulk/data/new/ruby/Gemfile +2 -0
  345. data/lib/embulk/data/new/ruby/Rakefile +3 -0
  346. data/lib/embulk/data/new/ruby/decoder_guess.rb.erb +25 -0
  347. data/lib/embulk/data/new/ruby/filter.rb.erb +41 -0
  348. data/lib/embulk/data/new/ruby/formatter.rb.erb +49 -0
  349. data/lib/embulk/data/new/ruby/gemspec.erb +20 -0
  350. data/lib/embulk/data/new/ruby/input.rb.erb +59 -0
  351. data/lib/embulk/data/new/ruby/output.rb.erb +61 -0
  352. data/lib/embulk/data/new/ruby/parser.rb.erb +44 -0
  353. data/lib/embulk/data/new/ruby/parser_guess.rb.erb +65 -0
  354. data/lib/embulk/data/package_data.rb +50 -0
  355. data/lib/embulk/data_source.rb +220 -0
  356. data/lib/embulk/decoder_plugin.rb +27 -0
  357. data/lib/embulk/encoder_plugin.rb +27 -0
  358. data/lib/embulk/error.rb +8 -0
  359. data/lib/embulk/executor_plugin.rb +23 -0
  360. data/lib/embulk/file_input.rb +87 -0
  361. data/lib/embulk/file_input_plugin.rb +27 -0
  362. data/lib/embulk/file_output.rb +56 -0
  363. data/lib/embulk/file_output_plugin.rb +27 -0
  364. data/lib/embulk/filter_plugin.rb +105 -0
  365. data/lib/embulk/formatter_plugin.rb +105 -0
  366. data/lib/embulk/guess/charset.rb +44 -0
  367. data/lib/embulk/guess/csv.rb +327 -0
  368. data/lib/embulk/guess/gzip.rb +18 -0
  369. data/lib/embulk/guess/newline.rb +22 -0
  370. data/lib/embulk/guess/schema_guess.rb +118 -0
  371. data/lib/embulk/guess/time_format_guess.rb +394 -0
  372. data/lib/embulk/guess_plugin.rb +129 -0
  373. data/lib/embulk/input_plugin.rb +121 -0
  374. data/lib/embulk/java/bootstrap.rb +24 -0
  375. data/lib/embulk/java/imports.rb +69 -0
  376. data/lib/embulk/java/time_helper.rb +79 -0
  377. data/lib/embulk/java_plugin.rb +90 -0
  378. data/lib/embulk/logger.rb +154 -0
  379. data/lib/embulk/output_plugin.rb +150 -0
  380. data/lib/embulk/page.rb +30 -0
  381. data/lib/embulk/page_builder.rb +76 -0
  382. data/lib/embulk/parser_plugin.rb +78 -0
  383. data/lib/embulk/plugin.rb +239 -0
  384. data/lib/embulk/plugin_registry.rb +96 -0
  385. data/lib/embulk/runner.rb +184 -0
  386. data/lib/embulk/schema.rb +103 -0
  387. data/lib/embulk/version.rb +3 -0
  388. data/settings.gradle +6 -0
  389. data/test/guess/test_schema_guess.rb +11 -0
  390. data/test/guess/test_time_format_guess.rb +133 -0
  391. data/test/helper.rb +21 -0
  392. data/test/run-test.rb +14 -0
  393. metadata +566 -0
@@ -0,0 +1,52 @@
1
+ package org.embulk.exec;
2
+
3
+ import org.slf4j.ILoggerFactory;
4
+ import com.google.common.base.Preconditions;
5
+ import com.google.inject.Module;
6
+ import com.google.inject.Binder;
7
+ import com.google.inject.Scopes;
8
+ import com.fasterxml.jackson.module.guice.ObjectMapperModule;
9
+ import com.fasterxml.jackson.datatype.guava.GuavaModule;
10
+ import com.fasterxml.jackson.datatype.joda.JodaModule;
11
+ import org.embulk.config.ModelManager;
12
+ import org.embulk.spi.time.DateTimeZoneSerDe;
13
+ import org.embulk.spi.time.TimestampSerDe;
14
+ import org.embulk.spi.ParserPlugin;
15
+ import org.embulk.spi.ExecutorPlugin;
16
+ import org.embulk.spi.BufferAllocator;
17
+ import org.embulk.spi.util.CharsetSerDe;
18
+ import org.embulk.spi.unit.LocalFileSerDe;
19
+ import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
20
+
21
+ public class ExecModule
22
+ implements Module
23
+ {
24
+ @Override
25
+ public void configure(Binder binder)
26
+ {
27
+ Preconditions.checkNotNull(binder, "binder is null.");
28
+
29
+ binder.bind(ILoggerFactory.class).toProvider(LoggerProvider.class).in(Scopes.SINGLETON);
30
+ binder.bind(ModelManager.class).in(Scopes.SINGLETON);
31
+ binder.bind(BufferAllocator.class).to(PooledBufferAllocator.class).in(Scopes.SINGLETON);
32
+ binder.bind(TempFileAllocator.class).in(Scopes.SINGLETON);
33
+
34
+ // GuessExecutor
35
+ registerPluginTo(binder, ParserPlugin.class, "system_guess", GuessExecutor.GuessParserPlugin.class);
36
+ registerPluginTo(binder, ParserPlugin.class, "system_sampling", SamplingParserPlugin.class);
37
+
38
+ // LocalExecutorPlugin
39
+ binder.bind(LocalThreadExecutor.class).in(Scopes.SINGLETON);
40
+ registerPluginTo(binder, ExecutorPlugin.class, "local", LocalExecutorPlugin.class);
41
+
42
+ // serde
43
+ ObjectMapperModule mapper = new ObjectMapperModule();
44
+ DateTimeZoneSerDe.configure(mapper);
45
+ TimestampSerDe.configure(mapper);
46
+ CharsetSerDe.configure(mapper);
47
+ LocalFileSerDe.configure(mapper);
48
+ mapper.registerModule(new GuavaModule()); // jackson-datatype-guava
49
+ mapper.registerModule(new JodaModule()); // jackson-datatype-joda
50
+ mapper.configure(binder);
51
+ }
52
+ }
@@ -0,0 +1,10 @@
1
+ package org.embulk.exec;
2
+
3
+ public class ExecutionInterruptedException
4
+ extends RuntimeException
5
+ {
6
+ public ExecutionInterruptedException(Exception cause)
7
+ {
8
+ super(cause);
9
+ }
10
+ }
@@ -0,0 +1,26 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import org.embulk.config.ConfigDiff;
5
+
6
+ public class ExecutionResult
7
+ {
8
+ private final ConfigDiff configDiff;
9
+ private final List<Throwable> ignoredExceptions;
10
+
11
+ public ExecutionResult(ConfigDiff configDiff, List<Throwable> ignoredExceptions)
12
+ {
13
+ this.configDiff = configDiff;
14
+ this.ignoredExceptions = ignoredExceptions;
15
+ }
16
+
17
+ public ConfigDiff getConfigDiff()
18
+ {
19
+ return configDiff;
20
+ }
21
+
22
+ public List<Throwable> getIgnoredExceptions()
23
+ {
24
+ return ignoredExceptions;
25
+ }
26
+ }
@@ -0,0 +1,43 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.ServiceLoader;
4
+ import com.google.inject.Module;
5
+ import com.google.inject.Binder;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.spi.Extension;
8
+
9
+ /**
10
+ * ExtensionServiceLoaderModule loads Extensions using java.util.ServiceLoader
11
+ * mechanism.
12
+ * Jar packages providing an extension need to include
13
+ * META-INF/services/org.embulk.exec.Extension file. Contents of the file is
14
+ * one-line text of the extension class name (e.g. com.example.MyPluginSourceExtension).
15
+ */
16
+ public class ExtensionServiceLoaderModule
17
+ implements Module
18
+ {
19
+ private final ClassLoader classLoader;
20
+ private final ConfigSource systemConfig;
21
+
22
+ public ExtensionServiceLoaderModule(ConfigSource systemConfig)
23
+ {
24
+ this(ExtensionServiceLoaderModule.class.getClassLoader(), systemConfig);
25
+ }
26
+
27
+ public ExtensionServiceLoaderModule(ClassLoader classLoader, ConfigSource systemConfig)
28
+ {
29
+ this.classLoader = classLoader;
30
+ this.systemConfig = systemConfig;
31
+ }
32
+
33
+ @Override
34
+ public void configure(Binder binder)
35
+ {
36
+ ServiceLoader<Extension> serviceLoader = ServiceLoader.load(Extension.class, classLoader);
37
+ for (Extension extension : serviceLoader) {
38
+ for (Module module : extension.getModules(systemConfig)) {
39
+ module.configure(binder);
40
+ }
41
+ }
42
+ }
43
+ }
@@ -0,0 +1,16 @@
1
+ package org.embulk.exec;
2
+
3
+ import javax.inject.Qualifier;
4
+ import java.lang.annotation.Retention;
5
+ import java.lang.annotation.Target;
6
+ import static java.lang.annotation.ElementType.FIELD;
7
+ import static java.lang.annotation.ElementType.METHOD;
8
+ import static java.lang.annotation.ElementType.PARAMETER;
9
+ import static java.lang.annotation.RetentionPolicy.RUNTIME;
10
+
11
+ @Retention(RUNTIME)
12
+ @Target({FIELD, PARAMETER, METHOD})
13
+ @Qualifier
14
+ public @interface ForGuess
15
+ {
16
+ }
@@ -0,0 +1,16 @@
1
+ package org.embulk.exec;
2
+
3
+ import javax.inject.Qualifier;
4
+ import java.lang.annotation.Retention;
5
+ import java.lang.annotation.Target;
6
+ import static java.lang.annotation.ElementType.FIELD;
7
+ import static java.lang.annotation.ElementType.METHOD;
8
+ import static java.lang.annotation.ElementType.PARAMETER;
9
+ import static java.lang.annotation.RetentionPolicy.RUNTIME;
10
+
11
+ @Retention(RUNTIME)
12
+ @Target({FIELD, PARAMETER, METHOD})
13
+ @Qualifier
14
+ public @interface ForSystemConfig
15
+ {
16
+ }
@@ -0,0 +1,373 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.Set;
5
+ import java.util.ArrayList;
6
+ import java.util.concurrent.ExecutionException;
7
+ import com.google.common.collect.ImmutableList;
8
+ import com.google.common.base.Throwables;
9
+ import com.google.inject.Inject;
10
+ import com.google.inject.Binder;
11
+ import com.google.inject.multibindings.Multibinder;
12
+ import org.embulk.plugin.PluginType;
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.Task;
17
+ import org.embulk.config.TaskSource;
18
+ import org.embulk.config.ConfigSource;
19
+ import org.embulk.config.TaskReport;
20
+ import org.embulk.spi.Schema;
21
+ import org.embulk.spi.Page;
22
+ import org.embulk.spi.Buffer;
23
+ import org.embulk.spi.InputPlugin;
24
+ import org.embulk.spi.FileInputPlugin;
25
+ import org.embulk.spi.ParserPlugin;
26
+ import org.embulk.spi.GuessPlugin;
27
+ import org.embulk.spi.Exec;
28
+ import org.embulk.spi.ExecAction;
29
+ import org.embulk.spi.ExecSession;
30
+ import org.embulk.spi.FileInput;
31
+ import org.embulk.spi.PageOutput;
32
+ import org.embulk.spi.TransactionalFileInput;
33
+ import org.embulk.spi.FileInputRunner;
34
+
35
+ public class GuessExecutor
36
+ {
37
+ private final List<PluginType> defaultGuessPlugins;
38
+
39
+ private interface GuessExecutorSystemTask
40
+ extends Task
41
+ {
42
+ @Config("guess_plugins")
43
+ @ConfigDefault("[]")
44
+ public List<PluginType> getGuessPlugins();
45
+ }
46
+
47
+ private interface GuessExecutorTask
48
+ extends Task
49
+ {
50
+ @Config("guess_plugins")
51
+ @ConfigDefault("[]")
52
+ public List<PluginType> getGuessPlugins();
53
+
54
+ @Config("exclude_guess_plugins")
55
+ @ConfigDefault("[]")
56
+ public List<PluginType> getExcludeGuessPlugins();
57
+ }
58
+
59
+ public static void registerDefaultGuessPluginTo(Binder binder, PluginType type)
60
+ {
61
+ Multibinder<PluginType> multibinder = Multibinder.newSetBinder(binder, PluginType.class, ForGuess.class);
62
+ multibinder.addBinding().toInstance(type);
63
+ }
64
+
65
+ @Inject
66
+ public GuessExecutor(@ForSystemConfig ConfigSource systemConfig,
67
+ @ForGuess Set<PluginType> defaultGuessPlugins)
68
+ {
69
+ GuessExecutorSystemTask systemTask = systemConfig.loadConfig(GuessExecutorSystemTask.class);
70
+
71
+ ImmutableList.Builder<PluginType> list = ImmutableList.builder();
72
+ list.addAll(defaultGuessPlugins);
73
+ list.addAll(systemTask.getGuessPlugins());
74
+ this.defaultGuessPlugins = list.build();
75
+ }
76
+
77
+ public ConfigDiff guess(ExecSession exec, final ConfigSource config)
78
+ {
79
+ try {
80
+ return Exec.doWith(exec, new ExecAction<ConfigDiff>() {
81
+ public ConfigDiff run()
82
+ {
83
+ try (SetCurrentThreadName dontCare = new SetCurrentThreadName("guess")) {
84
+ return doGuess(config);
85
+ }
86
+ }
87
+ });
88
+ } catch (ExecutionException ex) {
89
+ throw Throwables.propagate(ex.getCause());
90
+ }
91
+ }
92
+
93
+ protected InputPlugin newInputPlugin(ConfigSource inputConfig)
94
+ {
95
+ return Exec.newPlugin(InputPlugin.class, inputConfig.get(PluginType.class, "type"));
96
+ }
97
+
98
+ private ConfigDiff doGuess(ConfigSource config)
99
+ {
100
+ ConfigSource inputConfig = config.getNested("in");
101
+
102
+ InputPlugin input = newInputPlugin(inputConfig);
103
+
104
+ ConfigDiff inputGuessed;
105
+ try {
106
+ inputGuessed = input.guess(inputConfig);
107
+ } catch (AbstractMethodError ex) {
108
+ // for backward compatibility with embulk v0.4 interface
109
+ throw new UnsupportedOperationException(input.getClass().getSimpleName()+".guess(ConfigSource) is not implemented. This input plugin does not support guessing.");
110
+ }
111
+
112
+ ConfigDiff wrapped = Exec.newConfigDiff();
113
+ wrapped.getNestedOrSetEmpty("in").merge(inputGuessed);
114
+ return wrapped;
115
+ }
116
+
117
+ // called by FileInputRunner
118
+ public ConfigDiff guessParserConfig(Buffer sample, ConfigSource inputConfig, ConfigSource execConfig)
119
+ {
120
+ List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
121
+
122
+ GuessExecutorTask task = execConfig.loadConfig(GuessExecutorTask.class);
123
+ guessPlugins.addAll(task.getGuessPlugins());
124
+ guessPlugins.removeAll(task.getExcludeGuessPlugins());
125
+
126
+ return guessParserConfig(sample, inputConfig, guessPlugins);
127
+ }
128
+
129
+ private ConfigDiff guessParserConfig(Buffer sample,
130
+ ConfigSource config, List<PluginType> guessPlugins)
131
+ {
132
+ // repeat guessing upto 10 times
133
+ ConfigDiff lastGuessed = Exec.newConfigDiff();
134
+ for (int i=0; i < 10; i++) {
135
+ // include last-guessed config to run guess input
136
+ ConfigSource originalConfig = config.deepCopy().merge(lastGuessed);
137
+ ConfigSource guessInputConfig = originalConfig.deepCopy();
138
+ guessInputConfig.getNestedOrSetEmpty("parser")
139
+ .set("type", "system_guess") // override in.parser.type so that FileInputRunner.run uses GuessParserPlugin
140
+ .set("guess_plugins", guessPlugins)
141
+ .set("orig_config", originalConfig);
142
+
143
+ // run FileInputPlugin
144
+ final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
145
+ ConfigDiff guessed;
146
+ try {
147
+ input.transaction(guessInputConfig, new InputPlugin.Control() {
148
+ public List<TaskReport> run(TaskSource inputTaskSource, Schema schema, int taskCount)
149
+ {
150
+ if (taskCount == 0) {
151
+ throw new NoSampleException("No input files to guess");
152
+ }
153
+ input.run(inputTaskSource, null, 0, new PageOutput() {
154
+ @Override
155
+ public void add(Page page)
156
+ {
157
+ throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
158
+ }
159
+
160
+ @Override
161
+ public void finish() { }
162
+
163
+ @Override
164
+ public void close() { }
165
+ });
166
+ throw new AssertionError("Guess executor must throw GuessedNoticeError");
167
+ }
168
+ });
169
+
170
+ throw new AssertionError("Guess executor must throw GuessedNoticeError");
171
+
172
+ } catch (GuessedNoticeError error) {
173
+ guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
174
+ }
175
+
176
+ // merge to the last-guessed config
177
+ if (lastGuessed.equals(guessed)) {
178
+ // not changed
179
+ return lastGuessed;
180
+ }
181
+ lastGuessed = guessed;
182
+ }
183
+
184
+ return lastGuessed;
185
+ }
186
+
187
+ private static class BufferFileInputPlugin
188
+ implements FileInputPlugin
189
+ {
190
+ private Buffer buffer;
191
+
192
+ public BufferFileInputPlugin(Buffer buffer)
193
+ {
194
+ this.buffer = buffer;
195
+ }
196
+
197
+ public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control)
198
+ {
199
+ control.run(Exec.newTaskSource(), 1);
200
+ return Exec.newConfigDiff();
201
+ }
202
+
203
+ public ConfigDiff resume(TaskSource taskSource,
204
+ int taskCount,
205
+ FileInputPlugin.Control control)
206
+ {
207
+ throw new UnsupportedOperationException();
208
+ }
209
+
210
+ public void cleanup(TaskSource taskSource,
211
+ int taskCount,
212
+ List<TaskReport> successTaskReports)
213
+ {
214
+ if (buffer != null) {
215
+ buffer.release();
216
+ buffer = null;
217
+ }
218
+ }
219
+
220
+ public TransactionalFileInput open(TaskSource taskSource, int taskIndex)
221
+ {
222
+ return new BufferTransactionalFileInput(buffer);
223
+ }
224
+ }
225
+
226
+ private static class BufferTransactionalFileInput
227
+ implements TransactionalFileInput
228
+ {
229
+ private Buffer buffer;
230
+
231
+ public BufferTransactionalFileInput(Buffer buffer)
232
+ {
233
+ this.buffer = buffer;
234
+ }
235
+
236
+ @Override
237
+ public Buffer poll()
238
+ {
239
+ Buffer b = buffer;
240
+ buffer = null;
241
+ return b;
242
+ }
243
+
244
+ @Override
245
+ public boolean nextFile()
246
+ {
247
+ return buffer != null;
248
+ }
249
+
250
+ @Override
251
+ public void close() { }
252
+
253
+ @Override
254
+ public void abort() { }
255
+
256
+ @Override
257
+ public TaskReport commit()
258
+ {
259
+ return null;
260
+ }
261
+ }
262
+
263
+ public static class GuessParserPlugin
264
+ implements ParserPlugin
265
+ {
266
+ private interface PluginTask
267
+ extends Task
268
+ {
269
+ @Config("guess_plugins")
270
+ public List<PluginType> getGuessPluginTypes();
271
+
272
+ @Config("orig_config")
273
+ public ConfigSource getOriginalConfig();
274
+ }
275
+
276
+ @Override
277
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
278
+ {
279
+ PluginTask task = config.loadConfig(PluginTask.class);
280
+ control.run(task.dump(), null);
281
+ }
282
+
283
+ @Override
284
+ public void run(TaskSource taskSource, Schema schema,
285
+ FileInput input, PageOutput pageOutput)
286
+ {
287
+ PluginTask task = taskSource.loadTask(PluginTask.class);
288
+ final ConfigSource originalConfig = task.getOriginalConfig();
289
+
290
+ // get sample buffer
291
+ Buffer sample = getFirstBuffer(input);
292
+
293
+ // load guess plugins
294
+ ImmutableList.Builder<GuessPlugin> builder = ImmutableList.builder();
295
+ for (PluginType guessType : task.getGuessPluginTypes()) {
296
+ GuessPlugin guess = Exec.newPlugin(GuessPlugin.class, guessType);
297
+ builder.add(guess);
298
+ }
299
+ List<GuessPlugin> guesses = builder.build();
300
+
301
+ // run guess plugins
302
+ ConfigSource mergedConfig = originalConfig.deepCopy();
303
+ ConfigDiff mergedGuessed = Exec.newConfigDiff();
304
+ for (int i=0; i < guesses.size(); i++) {
305
+ ConfigDiff guessed = guesses.get(i).guess(originalConfig, sample);
306
+ guessed = addAssumedDecoderConfigs(originalConfig, guessed);
307
+ mergedGuessed.merge(guessed);
308
+ mergedConfig.merge(mergedGuessed);
309
+ if (!mergedConfig.equals(originalConfig)) {
310
+ // config updated
311
+ throw new GuessedNoticeError(mergedGuessed);
312
+ }
313
+ }
314
+ throw new GuessedNoticeError(mergedGuessed);
315
+ }
316
+
317
+ private static Buffer getFirstBuffer(FileInput input)
318
+ {
319
+ // The first buffer is created by SamplingParserPlugin. See FileInputRunner.guess.
320
+ RuntimeException decodeException = null;
321
+ try {
322
+ while (input.nextFile()) {
323
+ Buffer sample = input.poll();
324
+ if (sample != null) {
325
+ return sample;
326
+ }
327
+ }
328
+ } catch (RuntimeException ex) {
329
+ // ignores exceptions because FileDecoderPlugin can throw exceptions
330
+ // such as "Unexpected end of ZLIB input stream"
331
+ decodeException = ex;
332
+ }
333
+ if (decodeException != null) {
334
+ throw decodeException;
335
+ }
336
+ throw new NoSampleException("No input buffer to guess");
337
+ }
338
+
339
+ private static class ConfigSourceList extends ArrayList<ConfigSource> { };
340
+
341
+ private static ConfigDiff addAssumedDecoderConfigs(ConfigSource originalConfig, ConfigDiff guessed)
342
+ {
343
+ List<ConfigSource> guessedDecoders = guessed.get(ConfigSourceList.class, "decoders", null);
344
+ if (guessedDecoders == null) {
345
+ return guessed;
346
+ } else {
347
+ List<ConfigSource> assumedDecoders = originalConfig.get(ConfigSourceList.class, "decoders", new ConfigSourceList());
348
+ ImmutableList.Builder<ConfigSource> added = ImmutableList.builder();
349
+ for (ConfigSource assuemed : assumedDecoders) {
350
+ added.add(Exec.newConfigSource());
351
+ }
352
+ added.addAll(guessedDecoders);
353
+ return guessed.set("decoders", added.build());
354
+ }
355
+ }
356
+ }
357
+
358
+ public static class GuessedNoticeError
359
+ extends Error
360
+ {
361
+ private final ConfigDiff guessedConfig;
362
+
363
+ public GuessedNoticeError(ConfigDiff guessedConfig)
364
+ {
365
+ this.guessedConfig = guessedConfig;
366
+ }
367
+
368
+ public ConfigDiff getGuessedConfig()
369
+ {
370
+ return guessedConfig;
371
+ }
372
+ }
373
+ }