embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,10 @@
1
+ package org.embulk.exec;
2
+
3
+ public class ExecuteInterruptedException
4
+ extends RuntimeException
5
+ {
6
+ public ExecuteInterruptedException(Exception cause)
7
+ {
8
+ super(cause);
9
+ }
10
+ }
@@ -0,0 +1,19 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import org.embulk.config.NextConfig;
5
+
6
+ public class ExecuteResult
7
+ {
8
+ private final NextConfig nextConfig;
9
+
10
+ public ExecuteResult(NextConfig nextConfig)
11
+ {
12
+ this.nextConfig = nextConfig;
13
+ }
14
+
15
+ public NextConfig getNextConfig()
16
+ {
17
+ return nextConfig;
18
+ }
19
+ }
@@ -0,0 +1,43 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.ServiceLoader;
4
+ import com.google.inject.Module;
5
+ import com.google.inject.Binder;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.spi.Extension;
8
+
9
+ /**
10
+ * ExtensionServiceLoaderModule loads Extensions using java.util.ServiceLoader
11
+ * mechanism.
12
+ * Jar packages providing an extension need to include
13
+ * META-INF/services/org.embulk.exec.Extension file. Contents of the file is
14
+ * one-line text of the extension class name (e.g. com.example.MyPluginSourceExtension).
15
+ */
16
+ public class ExtensionServiceLoaderModule
17
+ implements Module
18
+ {
19
+ private final ClassLoader classLoader;
20
+ private final ConfigSource systemConfig;
21
+
22
+ public ExtensionServiceLoaderModule(ConfigSource systemConfig)
23
+ {
24
+ this(ExtensionServiceLoaderModule.class.getClassLoader(), systemConfig);
25
+ }
26
+
27
+ public ExtensionServiceLoaderModule(ClassLoader classLoader, ConfigSource systemConfig)
28
+ {
29
+ this.classLoader = classLoader;
30
+ this.systemConfig = systemConfig;
31
+ }
32
+
33
+ @Override
34
+ public void configure(Binder binder)
35
+ {
36
+ ServiceLoader<Extension> serviceLoader = ServiceLoader.load(Extension.class, classLoader);
37
+ for (Extension extension : serviceLoader) {
38
+ for (Module module : extension.getModules(systemConfig)) {
39
+ module.configure(binder);
40
+ }
41
+ }
42
+ }
43
+ }
@@ -0,0 +1,16 @@
1
+ package org.embulk.exec;
2
+
3
+ import javax.inject.Qualifier;
4
+ import java.lang.annotation.Retention;
5
+ import java.lang.annotation.Target;
6
+ import static java.lang.annotation.ElementType.FIELD;
7
+ import static java.lang.annotation.ElementType.METHOD;
8
+ import static java.lang.annotation.ElementType.PARAMETER;
9
+ import static java.lang.annotation.RetentionPolicy.RUNTIME;
10
+
11
+ @Retention(RUNTIME)
12
+ @Target({FIELD, PARAMETER, METHOD})
13
+ @Qualifier
14
+ public @interface ForSystemConfig
15
+ {
16
+ }
@@ -0,0 +1,307 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import com.google.common.collect.ImmutableList;
6
+ import com.google.inject.Inject;
7
+ import com.google.inject.Injector;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.NextConfig;
12
+ import org.embulk.config.DataSource;
13
+ import org.embulk.config.Task;
14
+ import org.embulk.config.TaskSource;
15
+ import org.embulk.config.ConfigSource;
16
+ import org.embulk.config.CommitReport;
17
+ import org.embulk.plugin.PluginType;
18
+ import org.embulk.spi.Schema;
19
+ import org.embulk.spi.Column;
20
+ import org.embulk.spi.Page;
21
+ import org.embulk.spi.Buffer;
22
+ import org.embulk.spi.InputPlugin;
23
+ import org.embulk.spi.FileInputPlugin;
24
+ import org.embulk.spi.ParserPlugin;
25
+ import org.embulk.spi.GuessPlugin;
26
+ import org.embulk.spi.Exec;
27
+ import org.embulk.spi.ExecAction;
28
+ import org.embulk.spi.ExecSession;
29
+ import org.embulk.spi.FileInput;
30
+ import org.embulk.spi.PageOutput;
31
+ import org.embulk.spi.TransactionalFileInput;
32
+ import org.embulk.spi.FileInputRunner;
33
+
34
+ public class GuessExecutor
35
+ {
36
+ private final Injector injector;
37
+ private final ConfigSource systemConfig;
38
+ private final List<PluginType> defaultGuessPlugins;
39
+
40
+ private interface GuessExecutorTask
41
+ extends Task
42
+ {
43
+ @Config("guess_plugins")
44
+ @ConfigDefault("[]")
45
+ public List<PluginType> getGuessPlugins();
46
+
47
+ @Config("exclude_guess_plugins")
48
+ @ConfigDefault("[]")
49
+ public List<PluginType> getExcludeGuessPlugins();
50
+ }
51
+
52
+ @Inject
53
+ public GuessExecutor(Injector injector,
54
+ @ForSystemConfig ConfigSource systemConfig)
55
+ {
56
+ this.injector = injector;
57
+ this.systemConfig = systemConfig;
58
+
59
+ // TODO get default guess plugins from injector using Multibinder
60
+ this.defaultGuessPlugins = ImmutableList.of(
61
+ new PluginType("gzip"),
62
+ new PluginType("charset"),
63
+ new PluginType("newline"),
64
+ new PluginType("csv"));
65
+ }
66
+
67
+ public NextConfig guess(ExecSession exec, final ConfigSource config)
68
+ {
69
+ try {
70
+ return Exec.doWith(exec, new ExecAction<NextConfig>() {
71
+ public NextConfig run()
72
+ {
73
+ return doGuess(config);
74
+ }
75
+ });
76
+ } catch (Exception ex) {
77
+ throw Throwables.propagate(ex);
78
+ }
79
+ }
80
+
81
+ private NextConfig doGuess(ConfigSource config)
82
+ {
83
+ Buffer sample = SamplingParserPlugin.runFileInputSampling(config);
84
+ if (sample.limit() == 0) {
85
+ throw new NoSampleException("Can't get sample data because the first input file is empty");
86
+ }
87
+
88
+ List<PluginType> guessPlugins = new ArrayList<PluginType>(defaultGuessPlugins);
89
+ GuessExecutorTask task = config.getNestedOrSetEmpty("exec").loadConfig(GuessExecutorTask.class);
90
+ guessPlugins.addAll(task.getGuessPlugins());
91
+ guessPlugins.removeAll(task.getExcludeGuessPlugins());
92
+
93
+ return runGuessInput(sample, config, guessPlugins);
94
+ }
95
+
96
+ private NextConfig runGuessInput(Buffer sample,
97
+ ConfigSource config, List<PluginType> guessPlugins)
98
+ {
99
+ // repeat guessing upto 10 times
100
+ NextConfig lastGuessed = Exec.newNextConfig();
101
+ for (int i=0; i < 10; i++) {
102
+ // include last-guessed config to run guess input
103
+ ConfigSource originalConfig = config.getNested("in").deepCopy().merge(lastGuessed);
104
+ ConfigSource guessInputConfig = originalConfig.deepCopy();
105
+ guessInputConfig.getNestedOrSetEmpty("parser")
106
+ .set("type", "system_guess") // override in.parser.type so that FileInputPlugin creates GuessParserPlugin
107
+ .set("guess_plugins", guessPlugins)
108
+ .set("orig_config", originalConfig);
109
+
110
+ // run FileInputPlugin
111
+ final FileInputRunner input = new FileInputRunner(new BufferFileInputPlugin(sample));
112
+ NextConfig guessed;
113
+ try {
114
+ input.transaction(guessInputConfig, new InputPlugin.Control() {
115
+ public List<CommitReport> run(TaskSource inputTaskSource, Schema schema, int processorCount)
116
+ {
117
+ // TODO repeat runwith processorIndex++ if NoSampleException happens
118
+ input.run(inputTaskSource, null, 0, new PageOutput() {
119
+ @Override
120
+ public void add(Page page)
121
+ {
122
+ throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
123
+ }
124
+
125
+ @Override
126
+ public void finish() { }
127
+
128
+ @Override
129
+ public void close() { }
130
+ });
131
+ throw new AssertionError("Guess executor must throw GuessedNoticeError");
132
+ }
133
+ });
134
+ throw new AssertionError("Guess executor must throw GuessedNoticeError");
135
+
136
+ } catch (GuessedNoticeError error) {
137
+ guessed = lastGuessed.deepCopy().merge(error.getGuessedConfig());
138
+ }
139
+
140
+ // merge to the last-guessed config
141
+ if (lastGuessed.equals(guessed)) {
142
+ // not changed
143
+ return wrapInIn(lastGuessed);
144
+ }
145
+ lastGuessed = guessed;
146
+ }
147
+
148
+ return wrapInIn(lastGuessed);
149
+ }
150
+
151
+ private static NextConfig wrapInIn(NextConfig lastGuessed)
152
+ {
153
+ NextConfig wrapped = Exec.newNextConfig();
154
+ wrapped.getNestedOrSetEmpty("in").merge(lastGuessed);
155
+ return wrapped;
156
+ }
157
+
158
+ private static class BufferFileInputPlugin
159
+ implements FileInputPlugin
160
+ {
161
+ private final Buffer buffer;
162
+
163
+ public BufferFileInputPlugin(Buffer buffer)
164
+ {
165
+ this.buffer = buffer;
166
+ }
167
+
168
+ public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
169
+ {
170
+ control.run(Exec.newTaskSource(), 1);
171
+ return Exec.newNextConfig();
172
+ }
173
+
174
+ public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
175
+ {
176
+ return new BufferTransactionalFileInput(buffer);
177
+ }
178
+ }
179
+
180
+ private static class BufferTransactionalFileInput
181
+ implements TransactionalFileInput
182
+ {
183
+ private Buffer buffer;
184
+
185
+ public BufferTransactionalFileInput(Buffer buffer)
186
+ {
187
+ this.buffer = buffer;
188
+ }
189
+
190
+ @Override
191
+ public Buffer poll()
192
+ {
193
+ Buffer b = buffer;
194
+ buffer = null;
195
+ return b;
196
+ }
197
+
198
+ @Override
199
+ public boolean nextFile()
200
+ {
201
+ return buffer != null;
202
+ }
203
+
204
+ @Override
205
+ public void close() { }
206
+
207
+ @Override
208
+ public void abort() { }
209
+
210
+ @Override
211
+ public CommitReport commit()
212
+ {
213
+ return null;
214
+ }
215
+ }
216
+
217
+ public static class GuessParserPlugin
218
+ implements ParserPlugin
219
+ {
220
+ private interface PluginTask
221
+ extends Task
222
+ {
223
+ @Config("guess_plugins")
224
+ public List<PluginType> getGuessPluginTypes();
225
+
226
+ @Config("orig_config")
227
+ public ConfigSource getOriginalConfig();
228
+ }
229
+
230
+ @Override
231
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
232
+ {
233
+ PluginTask task = config.loadConfig(PluginTask.class);
234
+ control.run(task.dump(), null);
235
+ }
236
+
237
+ @Override
238
+ public void run(TaskSource taskSource, Schema schema,
239
+ FileInput input, PageOutput pageOutput)
240
+ {
241
+ PluginTask task = taskSource.loadTask(PluginTask.class);
242
+ final ConfigSource originalConfig = task.getOriginalConfig();
243
+
244
+ // get sample buffer
245
+ Buffer sample = getFirstBuffer(input);
246
+
247
+ // load guess plugins
248
+ ImmutableList.Builder<GuessPlugin> builder = ImmutableList.builder();
249
+ for (PluginType guessType : task.getGuessPluginTypes()) {
250
+ GuessPlugin guess = Exec.newPlugin(GuessPlugin.class, guessType);
251
+ builder.add(guess);
252
+ }
253
+ List<GuessPlugin> guesses = builder.build();
254
+
255
+ // run guess plugins
256
+ ConfigSource mergedConfig = originalConfig.deepCopy();
257
+ NextConfig mergedGuessed = Exec.newNextConfig();
258
+ for (int i=0; i < guesses.size(); i++) {
259
+ NextConfig guessed = guesses.get(i).guess(originalConfig, sample);
260
+ mergedGuessed.merge(guessed);
261
+ mergedConfig.merge(mergedGuessed);
262
+ if (!mergedConfig.equals(originalConfig)) {
263
+ // config updated
264
+ throw new GuessedNoticeError(mergedGuessed);
265
+ }
266
+ }
267
+ throw new GuessedNoticeError(mergedGuessed);
268
+ }
269
+
270
+ private static Buffer getFirstBuffer(FileInput input)
271
+ {
272
+ RuntimeException decodeException = null;
273
+ try {
274
+ while (input.nextFile()) {
275
+ Buffer sample = input.poll();
276
+ if (sample != null) {
277
+ return sample;
278
+ }
279
+ }
280
+ } catch (RuntimeException ex) {
281
+ // ignores exceptions because FileDecoderPlugin can throw exceptions
282
+ // such as "Unexpected end of ZLIB input stream"
283
+ decodeException = ex;
284
+ }
285
+ if (decodeException != null) {
286
+ throw decodeException;
287
+ }
288
+ throw new NoSampleException("No input buffer to guess");
289
+ }
290
+ }
291
+
292
+ public static class GuessedNoticeError
293
+ extends Error
294
+ {
295
+ private final NextConfig guessedConfig;
296
+
297
+ public GuessedNoticeError(NextConfig guessedConfig)
298
+ {
299
+ this.guessedConfig = guessedConfig;
300
+ }
301
+
302
+ public NextConfig getGuessedConfig()
303
+ {
304
+ return guessedConfig;
305
+ }
306
+ }
307
+ }
@@ -0,0 +1,274 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.concurrent.Callable;
6
+ import java.util.concurrent.Future;
7
+ import java.util.concurrent.Executors;
8
+ import java.util.concurrent.ExecutorService;
9
+ import java.util.concurrent.ExecutionException;
10
+ import java.util.concurrent.atomic.AtomicInteger;
11
+
12
+ import com.google.common.collect.ImmutableList;
13
+ import com.google.inject.Inject;
14
+ import com.google.inject.Injector;
15
+ import com.google.common.base.Throwables;
16
+ import com.google.common.util.concurrent.ThreadFactoryBuilder;
17
+ import org.embulk.config.Task;
18
+ import org.embulk.config.Config;
19
+ import org.embulk.config.ConfigSource;
20
+ import org.embulk.config.TaskSource;
21
+ import org.embulk.config.NextConfig;
22
+ import org.embulk.config.CommitReport;
23
+ import org.embulk.plugin.PluginType;
24
+ import org.embulk.spi.Schema;
25
+ import org.embulk.spi.Exec;
26
+ import org.embulk.spi.ExecSession;
27
+ import org.embulk.spi.ExecAction;
28
+ import org.embulk.spi.InputPlugin;
29
+ import org.embulk.spi.OutputPlugin;
30
+ import org.embulk.spi.TransactionalPageOutput;
31
+ import org.slf4j.Logger;
32
+
33
+ public class LocalExecutor
34
+ {
35
+ private final Injector injector;
36
+ private final ConfigSource systemConfig;
37
+ private final int maxThreads;
38
+ private final ExecutorService executor;
39
+
40
+ private Logger log;
41
+ private final AtomicInteger runningTaskCount;
42
+ private final AtomicInteger completedTaskCount;
43
+
44
+ public interface ExecutorTask
45
+ extends Task
46
+ {
47
+ @Config("in")
48
+ public ConfigSource getInputConfig();
49
+
50
+ @Config("out")
51
+ public ConfigSource getOutputConfig();
52
+
53
+ public TaskSource getInputTask();
54
+ public void setInputTask(TaskSource taskSource);
55
+
56
+ public TaskSource getOutputTask();
57
+ public void setOutputTask(TaskSource taskSource);
58
+ }
59
+
60
+ @Inject
61
+ public LocalExecutor(Injector injector,
62
+ @ForSystemConfig ConfigSource systemConfig)
63
+ {
64
+ this.injector = injector;
65
+ this.systemConfig = systemConfig;
66
+
67
+ int defaultMaxThreads = Runtime.getRuntime().availableProcessors() * 2;
68
+ this.maxThreads = systemConfig.get(Integer.class, "max_threads", defaultMaxThreads);
69
+ this.executor = Executors.newFixedThreadPool(maxThreads,
70
+ new ThreadFactoryBuilder()
71
+ .setNameFormat("embulk-executor-%d")
72
+ .setDaemon(true)
73
+ .build());
74
+
75
+ this.runningTaskCount = new AtomicInteger(0);
76
+ this.completedTaskCount = new AtomicInteger(0);
77
+ }
78
+
79
+ private static class ExecuteResultBuilder
80
+ {
81
+ private NextConfig inputNextConfig;
82
+ private NextConfig outputNextConfig;
83
+
84
+ public void setInputNextConfig(NextConfig inputNextConfig)
85
+ {
86
+ this.inputNextConfig = inputNextConfig;
87
+ }
88
+
89
+ public void setOutputNextConfig(NextConfig outputNextConfig)
90
+ {
91
+ this.outputNextConfig = outputNextConfig;
92
+ }
93
+
94
+ public NextConfig getInputNextConfig()
95
+ {
96
+ return inputNextConfig;
97
+ }
98
+
99
+ public NextConfig getOutputNextConfig()
100
+ {
101
+ return outputNextConfig;
102
+ }
103
+
104
+ public ExecuteResult build()
105
+ {
106
+ if (inputNextConfig == null) {
107
+ inputNextConfig = Exec.newNextConfig();
108
+ }
109
+ if (outputNextConfig == null) {
110
+ outputNextConfig = Exec.newNextConfig();
111
+ }
112
+ NextConfig nextConfig = inputNextConfig.deepCopy().merge(outputNextConfig);
113
+ return new ExecuteResult(nextConfig);
114
+ }
115
+ }
116
+
117
+ private static class ProcessResult
118
+ {
119
+ private final CommitReport inputCommitReport;
120
+ private final CommitReport outputCommitReport;
121
+
122
+ public ProcessResult(CommitReport inputCommitReport, CommitReport outputCommitReport)
123
+ {
124
+ this.inputCommitReport = inputCommitReport;
125
+ this.outputCommitReport = outputCommitReport;
126
+ }
127
+
128
+ public CommitReport getInputCommitReport()
129
+ {
130
+ return inputCommitReport;
131
+ }
132
+
133
+ public CommitReport getOutputCommitReport()
134
+ {
135
+ return outputCommitReport;
136
+ }
137
+ }
138
+
139
+ protected InputPlugin newInputPlugin(ExecutorTask task)
140
+ {
141
+ return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
142
+ }
143
+
144
+ protected OutputPlugin newOutputPlugin(ExecutorTask task)
145
+ {
146
+ return Exec.newPlugin(OutputPlugin.class, task.getOutputConfig().get(PluginType.class, "type"));
147
+ }
148
+
149
+ public ExecuteResult run(ExecSession exec, final ConfigSource config)
150
+ {
151
+ log = exec.getLogger(LocalExecutor.class);
152
+ try {
153
+ return Exec.doWith(exec, new ExecAction<ExecuteResult>() {
154
+ public ExecuteResult run()
155
+ {
156
+ return doRun(config);
157
+ }
158
+ });
159
+ } catch (Exception ex) {
160
+ throw Throwables.propagate(ex);
161
+ }
162
+ }
163
+
164
+ private ExecuteResult doRun(ConfigSource config)
165
+ {
166
+ final ExecutorTask task = config.loadConfig(ExecutorTask.class);
167
+
168
+ final InputPlugin in = newInputPlugin(task);
169
+ final OutputPlugin out = newOutputPlugin(task);
170
+
171
+ final ExecuteResultBuilder execResult = new ExecuteResultBuilder();
172
+
173
+ NextConfig inputNextConfig = in.transaction(task.getInputConfig(), new InputPlugin.Control() {
174
+ public List<CommitReport> run(final TaskSource inputTask, final Schema schema, final int processorCount)
175
+ {
176
+ final ImmutableList.Builder<CommitReport> inputCommitReports = ImmutableList.builder();
177
+ NextConfig outputNextConfig = out.transaction(task.getOutputConfig(), schema, processorCount, new OutputPlugin.Control() {
178
+ public List<CommitReport> run(final TaskSource outputTask)
179
+ {
180
+ final ImmutableList.Builder<CommitReport> outputCommitReports = ImmutableList.builder();
181
+ task.setInputTask(inputTask);
182
+ task.setOutputTask(outputTask);
183
+
184
+ //log.debug("input: %s", task.getInputTask());
185
+ //log.debug("output: %s", task.getOutputTask());
186
+
187
+ List<ProcessResult> results = process(task.dump(), schema, processorCount);
188
+ for (ProcessResult result : results) {
189
+ inputCommitReports.add(result.getInputCommitReport());
190
+ outputCommitReports.add(result.getOutputCommitReport());
191
+ }
192
+
193
+ return outputCommitReports.build();
194
+ }
195
+ });
196
+ execResult.setOutputNextConfig(outputNextConfig);
197
+ return inputCommitReports.build();
198
+ }
199
+ });
200
+ execResult.setInputNextConfig(inputNextConfig);
201
+
202
+ return execResult.build();
203
+ }
204
+
205
+ private List<ProcessResult> process(TaskSource taskSource, Schema schema, int processorCount)
206
+ {
207
+ List<Future<ProcessResult>> futures = new ArrayList<>();
208
+ List<ProcessResult> joined = new ArrayList<>();
209
+ try {
210
+ log.info("Running {} tasks using {} local threads", processorCount, maxThreads);
211
+ showProgress(processorCount);
212
+ for (int i=0; i < processorCount; i++) {
213
+ futures.add(startProcessor(taskSource, schema, i));
214
+ }
215
+
216
+ for (int i=0; i < processorCount; i++) {
217
+ try {
218
+ joined.add(futures.get(i).get());
219
+ showProgress(processorCount);
220
+
221
+ } catch (ExecutionException ex) {
222
+ throw Throwables.propagate(ex.getCause());
223
+ } catch (InterruptedException ex) {
224
+ throw new ExecuteInterruptedException(ex);
225
+ }
226
+ }
227
+ return joined;
228
+ } finally {
229
+ for (int i=joined.size(); i < futures.size(); i++) {
230
+ futures.get(i).cancel(true);
231
+ // TODO join?
232
+ }
233
+ }
234
+ }
235
+
236
+ private void showProgress(int total)
237
+ {
238
+ int running = runningTaskCount.get();
239
+ int done = completedTaskCount.get();
240
+ log.info(String.format("{done:%3d / %d, running: %d}", done, total, running));
241
+ }
242
+
243
+ private Future<ProcessResult> startProcessor(final TaskSource taskSource, final Schema schema, final int index)
244
+ {
245
+ return executor.submit(new Callable<ProcessResult>() {
246
+ public ProcessResult call()
247
+ {
248
+ try {
249
+ runningTaskCount.getAndIncrement();
250
+ final ExecutorTask task = taskSource.loadTask(ExecutorTask.class);
251
+ final InputPlugin in = newInputPlugin(task);
252
+ final OutputPlugin out = newOutputPlugin(task);
253
+
254
+ TransactionalPageOutput tran = out.open(task.getOutputTask(), schema, index);
255
+ boolean committed = false;
256
+ try {
257
+ CommitReport inReport = in.run(task.getInputTask(), schema, index, tran);
258
+ CommitReport outReport = tran.commit(); // TODO check output.finish() is called. wrap or abstract
259
+ committed = true;
260
+ return new ProcessResult(inReport, outReport);
261
+ } finally {
262
+ if (!committed) {
263
+ tran.abort();
264
+ }
265
+ tran.close();
266
+ }
267
+ } finally {
268
+ runningTaskCount.getAndDecrement();
269
+ completedTaskCount.getAndIncrement();
270
+ }
271
+ }
272
+ });
273
+ }
274
+ }