embulk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,30 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.Properties;
4
+ import org.slf4j.ILoggerFactory;
5
+ import org.slf4j.LoggerFactory;
6
+ import org.apache.log4j.PropertyConfigurator;
7
+ import com.google.inject.Provider;
8
+
9
+ public class LoggerProvider
10
+ implements Provider<ILoggerFactory>
11
+ {
12
+ public LoggerProvider()
13
+ {
14
+ // TODO system config
15
+ Properties prop = new Properties();
16
+
17
+ prop.setProperty("log4j.rootLogger", "INFO,root");
18
+ prop.setProperty("log4j.appender.root", "org.apache.log4j.ConsoleAppender");
19
+ prop.setProperty("log4j.appender.root.layout", "org.apache.log4j.PatternLayout");
20
+ prop.setProperty("log4j.appender.root.layout.ConversionPattern", "%d [%p]: %t:%c: %m%n");
21
+
22
+ // TODO
23
+ PropertyConfigurator.configure(prop);
24
+ }
25
+
26
+ public ILoggerFactory get()
27
+ {
28
+ return LoggerFactory.getILoggerFactory();
29
+ }
30
+ }
@@ -0,0 +1,10 @@
1
+ package org.embulk.exec;
2
+
3
+ public class NoSampleException
4
+ extends RuntimeException
5
+ {
6
+ public NoSampleException(String message)
7
+ {
8
+ super(message);
9
+ }
10
+ }
@@ -0,0 +1,58 @@
1
+ package org.embulk.exec;
2
+
3
+ import io.netty.buffer.PooledByteBufAllocator;
4
+ import io.netty.buffer.ByteBuf;
5
+ import io.netty.util.ResourceLeakDetector;
6
+ import org.embulk.spi.Buffer;
7
+ import org.embulk.spi.BufferAllocator;
8
+
9
+ public class PooledBufferAllocator
10
+ implements BufferAllocator
11
+ {
12
+ private PooledByteBufAllocator nettyBuffer;
13
+
14
+ public PooledBufferAllocator()
15
+ {
16
+ // TODO configure parameters
17
+ this.nettyBuffer = new PooledByteBufAllocator(false);
18
+ }
19
+
20
+ public Buffer allocate()
21
+ {
22
+ return new NettyByteBufBuffer(nettyBuffer.buffer());
23
+ }
24
+
25
+ public Buffer allocate(int minimumCapacity)
26
+ {
27
+ int size = 32*1024;
28
+ while (size < minimumCapacity) {
29
+ size *= 2;
30
+ }
31
+ return new NettyByteBufBuffer(nettyBuffer.buffer(size));
32
+ }
33
+
34
+ private static class NettyByteBufBuffer
35
+ extends Buffer
36
+ {
37
+ private ByteBuf buf;
38
+ private Exception doubleFreeCheck;
39
+
40
+ public NettyByteBufBuffer(ByteBuf buf)
41
+ {
42
+ super(buf.array(), buf.arrayOffset(), buf.capacity());
43
+ this.buf = buf;
44
+ }
45
+
46
+ public void release()
47
+ {
48
+ if (doubleFreeCheck != null) {
49
+ doubleFreeCheck.printStackTrace();
50
+ }
51
+ if (buf != null) {
52
+ buf.release();
53
+ buf = null;
54
+ doubleFreeCheck = new NullPointerException();
55
+ }
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,138 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import javax.validation.constraints.NotNull;
6
+ import com.google.inject.Inject;
7
+ import com.google.inject.Injector;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.Task;
12
+ import org.embulk.config.TaskSource;
13
+ import org.embulk.config.ConfigSource;
14
+ import org.embulk.config.CommitReport;
15
+ import org.embulk.plugin.PluginType;
16
+ import org.embulk.spi.Schema;
17
+ import org.embulk.spi.Page;
18
+ import org.embulk.spi.PageOutput;
19
+ import org.embulk.spi.PageReader;
20
+ import org.embulk.spi.InputPlugin;
21
+ import org.embulk.spi.Exec;
22
+ import org.embulk.spi.ExecSession;
23
+ import org.embulk.spi.ExecAction;
24
+
25
+ public class PreviewExecutor
26
+ {
27
+ private final Injector injector;
28
+ private final ConfigSource systemConfig;
29
+
30
+ public interface PreviewTask
31
+ extends Task
32
+ {
33
+ @Config("in")
34
+ @NotNull
35
+ public ConfigSource getInputConfig();
36
+
37
+ @Config("preview_sample_rows")
38
+ @ConfigDefault("15")
39
+ public int getSampleRows();
40
+
41
+ public TaskSource getInputTask();
42
+ public void setInputTask(TaskSource taskSource);
43
+ }
44
+
45
+ @Inject
46
+ public PreviewExecutor(Injector injector,
47
+ @ForSystemConfig ConfigSource systemConfig)
48
+ {
49
+ this.injector = injector;
50
+ this.systemConfig = systemConfig;
51
+ }
52
+
53
+ public PreviewResult preview(ExecSession exec, final ConfigSource config)
54
+ {
55
+ try {
56
+ return Exec.doWith(exec, new ExecAction<PreviewResult>() {
57
+ public PreviewResult run()
58
+ {
59
+ return doPreview(config);
60
+ }
61
+ });
62
+ } catch (Exception ex) {
63
+ throw Throwables.propagate(ex);
64
+ }
65
+ }
66
+
67
+ protected InputPlugin newInputPlugin(PreviewTask task)
68
+ {
69
+ return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
70
+ }
71
+
72
+ private PreviewResult doPreview(ConfigSource config)
73
+ {
74
+ final PreviewTask task = config.loadConfig(PreviewTask.class);
75
+ InputPlugin input = newInputPlugin(task);
76
+
77
+ try {
78
+ input.transaction(task.getInputConfig(), new InputPlugin.Control() {
79
+ public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
80
+ {
81
+ InputPlugin input = newInputPlugin(task);
82
+ try (SamplingPageOutput out = new SamplingPageOutput(task.getSampleRows(), schema)) {
83
+ input.run(taskSource, schema, 0, out);
84
+ }
85
+ throw new NoSampleException("No input records to preview");
86
+ }
87
+ });
88
+ throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
89
+ } catch (PreviewedNoticeError previewed) {
90
+ return previewed.getPreviewResult();
91
+ }
92
+ }
93
+
94
+ private static class SamplingPageOutput
95
+ implements PageOutput
96
+ {
97
+ private final int sampleRows;
98
+ private final Schema schema;
99
+ private List<Page> pages;
100
+ private int recordCount;
101
+
102
+ public SamplingPageOutput(int sampleRows, Schema schema)
103
+ {
104
+ this.sampleRows = sampleRows;
105
+ this.schema = schema;
106
+ this.pages = new ArrayList<Page>();
107
+ }
108
+
109
+ @Override
110
+ public void add(Page page)
111
+ {
112
+ pages.add(page);
113
+ recordCount += PageReader.getRecordCount(page);
114
+ if (recordCount >= sampleRows) {
115
+ finish();
116
+ }
117
+ }
118
+
119
+ @Override
120
+ public void finish()
121
+ {
122
+ if (recordCount == 0) {
123
+ throw new NoSampleException("No input records to preview");
124
+ }
125
+ PreviewResult res = new PreviewResult(schema, pages);
126
+ pages = null;
127
+ throw new PreviewedNoticeError(res);
128
+ }
129
+
130
+ @Override
131
+ public void close()
132
+ {
133
+ for (Page page : pages) {
134
+ page.release();
135
+ }
136
+ }
137
+ }
138
+ }
@@ -0,0 +1,27 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import org.embulk.spi.Schema;
5
+ import org.embulk.spi.Page;
6
+
7
+ public class PreviewResult
8
+ {
9
+ private final Schema schema;
10
+ private final List<Page> pages;
11
+
12
+ public PreviewResult(Schema schema, List<Page> pages)
13
+ {
14
+ this.schema = schema;
15
+ this.pages = pages;
16
+ }
17
+
18
+ public Schema getSchema()
19
+ {
20
+ return schema;
21
+ }
22
+
23
+ public List<Page> getPages()
24
+ {
25
+ return pages;
26
+ }
27
+ }
@@ -0,0 +1,17 @@
1
+ package org.embulk.exec;
2
+
3
+ public class PreviewedNoticeError
4
+ extends Error
5
+ {
6
+ private final PreviewResult previewResult;
7
+
8
+ public PreviewedNoticeError(PreviewResult previewResult)
9
+ {
10
+ this.previewResult = previewResult;
11
+ }
12
+
13
+ public PreviewResult getPreviewResult()
14
+ {
15
+ return previewResult;
16
+ }
17
+ }
@@ -0,0 +1,116 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import com.google.inject.Inject;
5
+ import org.embulk.config.TaskSource;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.CommitReport;
8
+ import org.embulk.plugin.PluginType;
9
+ import org.embulk.spi.Schema;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.Page;
12
+ import org.embulk.spi.Buffer;
13
+ import org.embulk.spi.InputPlugin;
14
+ import org.embulk.spi.ParserPlugin;
15
+ import org.embulk.spi.FileInput;
16
+ import org.embulk.spi.PageOutput;
17
+ import static org.embulk.spi.util.Inputs.each;
18
+
19
+ /*
20
+ * Used by GuessExecutor
21
+ */
22
+ class SamplingParserPlugin
23
+ implements ParserPlugin
24
+ {
25
+ private final int maxSampleSize;
26
+
27
+ @Inject
28
+ public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
29
+ {
30
+ this.maxSampleSize = 32*1024; // TODO get sample syze from system config
31
+ }
32
+
33
+ @Override
34
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
35
+ {
36
+ control.run(Exec.newTaskSource(), null);
37
+ }
38
+
39
+ @Override
40
+ public void run(TaskSource taskSource, Schema schema,
41
+ FileInput input, PageOutput output)
42
+ {
43
+ Buffer buffer = getSample(input, maxSampleSize);
44
+ throw new SampledNoticeError(buffer);
45
+ }
46
+
47
+ static Buffer runFileInputSampling(ConfigSource config)
48
+ {
49
+ // override in.parser.type so that FileInputRunner creates GuessParserPlugin
50
+ ConfigSource samplingInputConfig = config.getNested("in").deepCopy();
51
+ samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
52
+
53
+ final InputPlugin input = Exec.newPlugin(InputPlugin.class, samplingInputConfig.get(PluginType.class, "type"));
54
+ try {
55
+ input.transaction(samplingInputConfig, new InputPlugin.Control() {
56
+ public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
57
+ {
58
+ input.run(taskSource, schema, 0, new PageOutput() {
59
+ @Override
60
+ public void add(Page page)
61
+ {
62
+ throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
63
+ }
64
+
65
+ public void finish() { }
66
+
67
+ public void close() { }
68
+ });
69
+ throw new NoSampleException("No input files to guess parser configuration");
70
+ }
71
+ });
72
+ throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
73
+ } catch (SampledNoticeError error) {
74
+ return error.getSample();
75
+ }
76
+ }
77
+
78
+ private static Buffer getSample(FileInput fileInput, int maxSampleSize)
79
+ {
80
+ Buffer sample = Buffer.allocate(maxSampleSize);
81
+ int sampleSize = 0;
82
+
83
+ while (fileInput.nextFile()) {
84
+ for (Buffer buffer : each(fileInput)) {
85
+ if (sampleSize >= maxSampleSize) {
86
+ // skip remaining all buffers so that FileInputPlugin.runInput doesn't
87
+ // throw exceptions at channel.join()
88
+ } else {
89
+ int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
90
+ sample.setBytes(sampleSize, buffer, 0, size);
91
+ sampleSize += size;
92
+ }
93
+ buffer.release();
94
+ }
95
+ }
96
+
97
+ sample.limit(sampleSize);
98
+ return sample;
99
+ }
100
+
101
+ public static class SampledNoticeError
102
+ extends Error
103
+ {
104
+ private final Buffer sample;
105
+
106
+ public SampledNoticeError(Buffer sample)
107
+ {
108
+ this.sample = sample;
109
+ }
110
+
111
+ public Buffer getSample()
112
+ {
113
+ return sample;
114
+ }
115
+ }
116
+ }
@@ -0,0 +1,24 @@
1
+ package org.embulk.exec;
2
+
3
+ import com.google.inject.Module;
4
+ import com.google.inject.Binder;
5
+ import org.embulk.config.ConfigSource;
6
+
7
+ public class SystemConfigModule
8
+ implements Module
9
+ {
10
+ private final ConfigSource systemConfig;
11
+
12
+ public SystemConfigModule(ConfigSource systemConfig)
13
+ {
14
+ this.systemConfig = systemConfig;
15
+ }
16
+
17
+ @Override
18
+ public void configure(Binder binder)
19
+ {
20
+ binder.bind(ConfigSource.class)
21
+ .annotatedWith(ForSystemConfig.class)
22
+ .toInstance(systemConfig);
23
+ }
24
+ }
@@ -0,0 +1,69 @@
1
+ package org.embulk.jruby;
2
+
3
+ import com.google.inject.Inject;
4
+ import org.jruby.embed.ScriptingContainer;
5
+ import org.jruby.embed.InvokeFailedException;
6
+ import org.embulk.plugin.PluginType;
7
+ import org.embulk.plugin.PluginSource;
8
+ import org.embulk.plugin.PluginSourceNotMatchException;
9
+ import org.embulk.spi.InputPlugin;
10
+ import org.embulk.spi.OutputPlugin;
11
+ import org.embulk.spi.ParserPlugin;
12
+ import org.embulk.spi.FormatterPlugin;
13
+ import org.embulk.spi.DecoderPlugin;
14
+ import org.embulk.spi.EncoderPlugin;
15
+ //import org.embulk.spi.LineFilterPlugin;
16
+ import org.embulk.spi.GuessPlugin;
17
+
18
+ public class JRubyPluginSource
19
+ implements PluginSource
20
+ {
21
+ private final ScriptingContainer jruby;
22
+ private final Object rubyPluginManager;
23
+
24
+ @Inject
25
+ public JRubyPluginSource(ScriptingContainer jruby)
26
+ {
27
+ this.jruby = jruby;
28
+
29
+ // get Embulk::Plugin
30
+ //this.rubyPluginManager = ((RubyModule) jruby.get("Embulk")).const_get(
31
+ // RubySymbol.newSymbol(
32
+ // jruby.getProvider().getRuntime(), "Plugin"));
33
+ this.rubyPluginManager = jruby.runScriptlet("Embulk::Plugin");
34
+ }
35
+
36
+ public <T> T newPlugin(Class<T> iface, PluginType type) throws PluginSourceNotMatchException
37
+ {
38
+ String name = type.getName();
39
+
40
+ String category;
41
+ if (InputPlugin.class.isAssignableFrom(iface)) {
42
+ category = "input";
43
+ } else if (OutputPlugin.class.isAssignableFrom(iface)) {
44
+ category = "output";
45
+ } else if (ParserPlugin.class.isAssignableFrom(iface)) {
46
+ category = "parser";
47
+ } else if (FormatterPlugin.class.isAssignableFrom(iface)) {
48
+ category = "formatter";
49
+ } else if (DecoderPlugin.class.isAssignableFrom(iface)) {
50
+ category = "decoder";
51
+ } else if (EncoderPlugin.class.isAssignableFrom(iface)) {
52
+ category = "encoder";
53
+ //} else if (LineFilterPlugin.class.isAssignableFrom(iface)) {
54
+ // category = "line_filter";
55
+ } else if (GuessPlugin.class.isAssignableFrom(iface)) {
56
+ category = "guess";
57
+ } else {
58
+ // unsupported plugin category
59
+ throw new PluginSourceNotMatchException("Plugin interface "+iface+" is not supported in JRuby");
60
+ }
61
+
62
+ String methodName = "new_java_" + category;
63
+ try {
64
+ return jruby.callMethod(rubyPluginManager, methodName, name, iface);
65
+ } catch (InvokeFailedException ex) {
66
+ throw new PluginSourceNotMatchException(ex.getCause());
67
+ }
68
+ }
69
+ }