embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,30 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.Properties;
4
+ import org.slf4j.ILoggerFactory;
5
+ import org.slf4j.LoggerFactory;
6
+ import org.apache.log4j.PropertyConfigurator;
7
+ import com.google.inject.Provider;
8
+
9
+ public class LoggerProvider
10
+ implements Provider<ILoggerFactory>
11
+ {
12
+ public LoggerProvider()
13
+ {
14
+ // TODO system config
15
+ Properties prop = new Properties();
16
+
17
+ prop.setProperty("log4j.rootLogger", "INFO,root");
18
+ prop.setProperty("log4j.appender.root", "org.apache.log4j.ConsoleAppender");
19
+ prop.setProperty("log4j.appender.root.layout", "org.apache.log4j.PatternLayout");
20
+ prop.setProperty("log4j.appender.root.layout.ConversionPattern", "%d [%p]: %t:%c: %m%n");
21
+
22
+ // TODO
23
+ PropertyConfigurator.configure(prop);
24
+ }
25
+
26
+ public ILoggerFactory get()
27
+ {
28
+ return LoggerFactory.getILoggerFactory();
29
+ }
30
+ }
@@ -0,0 +1,10 @@
1
+ package org.embulk.exec;
2
+
3
+ public class NoSampleException
4
+ extends RuntimeException
5
+ {
6
+ public NoSampleException(String message)
7
+ {
8
+ super(message);
9
+ }
10
+ }
@@ -0,0 +1,58 @@
1
+ package org.embulk.exec;
2
+
3
+ import io.netty.buffer.PooledByteBufAllocator;
4
+ import io.netty.buffer.ByteBuf;
5
+ import io.netty.util.ResourceLeakDetector;
6
+ import org.embulk.spi.Buffer;
7
+ import org.embulk.spi.BufferAllocator;
8
+
9
+ public class PooledBufferAllocator
10
+ implements BufferAllocator
11
+ {
12
+ private PooledByteBufAllocator nettyBuffer;
13
+
14
+ public PooledBufferAllocator()
15
+ {
16
+ // TODO configure parameters
17
+ this.nettyBuffer = new PooledByteBufAllocator(false);
18
+ }
19
+
20
+ public Buffer allocate()
21
+ {
22
+ return new NettyByteBufBuffer(nettyBuffer.buffer());
23
+ }
24
+
25
+ public Buffer allocate(int minimumCapacity)
26
+ {
27
+ int size = 32*1024;
28
+ while (size < minimumCapacity) {
29
+ size *= 2;
30
+ }
31
+ return new NettyByteBufBuffer(nettyBuffer.buffer(size));
32
+ }
33
+
34
+ private static class NettyByteBufBuffer
35
+ extends Buffer
36
+ {
37
+ private ByteBuf buf;
38
+ private Exception doubleFreeCheck;
39
+
40
+ public NettyByteBufBuffer(ByteBuf buf)
41
+ {
42
+ super(buf.array(), buf.arrayOffset(), buf.capacity());
43
+ this.buf = buf;
44
+ }
45
+
46
+ public void release()
47
+ {
48
+ if (doubleFreeCheck != null) {
49
+ doubleFreeCheck.printStackTrace();
50
+ }
51
+ if (buf != null) {
52
+ buf.release();
53
+ buf = null;
54
+ doubleFreeCheck = new NullPointerException();
55
+ }
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,138 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import javax.validation.constraints.NotNull;
6
+ import com.google.inject.Inject;
7
+ import com.google.inject.Injector;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.config.Config;
10
+ import org.embulk.config.ConfigDefault;
11
+ import org.embulk.config.Task;
12
+ import org.embulk.config.TaskSource;
13
+ import org.embulk.config.ConfigSource;
14
+ import org.embulk.config.CommitReport;
15
+ import org.embulk.plugin.PluginType;
16
+ import org.embulk.spi.Schema;
17
+ import org.embulk.spi.Page;
18
+ import org.embulk.spi.PageOutput;
19
+ import org.embulk.spi.PageReader;
20
+ import org.embulk.spi.InputPlugin;
21
+ import org.embulk.spi.Exec;
22
+ import org.embulk.spi.ExecSession;
23
+ import org.embulk.spi.ExecAction;
24
+
25
+ public class PreviewExecutor
26
+ {
27
+ private final Injector injector;
28
+ private final ConfigSource systemConfig;
29
+
30
+ public interface PreviewTask
31
+ extends Task
32
+ {
33
+ @Config("in")
34
+ @NotNull
35
+ public ConfigSource getInputConfig();
36
+
37
+ @Config("preview_sample_rows")
38
+ @ConfigDefault("15")
39
+ public int getSampleRows();
40
+
41
+ public TaskSource getInputTask();
42
+ public void setInputTask(TaskSource taskSource);
43
+ }
44
+
45
+ @Inject
46
+ public PreviewExecutor(Injector injector,
47
+ @ForSystemConfig ConfigSource systemConfig)
48
+ {
49
+ this.injector = injector;
50
+ this.systemConfig = systemConfig;
51
+ }
52
+
53
+ public PreviewResult preview(ExecSession exec, final ConfigSource config)
54
+ {
55
+ try {
56
+ return Exec.doWith(exec, new ExecAction<PreviewResult>() {
57
+ public PreviewResult run()
58
+ {
59
+ return doPreview(config);
60
+ }
61
+ });
62
+ } catch (Exception ex) {
63
+ throw Throwables.propagate(ex);
64
+ }
65
+ }
66
+
67
+ protected InputPlugin newInputPlugin(PreviewTask task)
68
+ {
69
+ return Exec.newPlugin(InputPlugin.class, task.getInputConfig().get(PluginType.class, "type"));
70
+ }
71
+
72
+ private PreviewResult doPreview(ConfigSource config)
73
+ {
74
+ final PreviewTask task = config.loadConfig(PreviewTask.class);
75
+ InputPlugin input = newInputPlugin(task);
76
+
77
+ try {
78
+ input.transaction(task.getInputConfig(), new InputPlugin.Control() {
79
+ public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
80
+ {
81
+ InputPlugin input = newInputPlugin(task);
82
+ try (SamplingPageOutput out = new SamplingPageOutput(task.getSampleRows(), schema)) {
83
+ input.run(taskSource, schema, 0, out);
84
+ }
85
+ throw new NoSampleException("No input records to preview");
86
+ }
87
+ });
88
+ throw new AssertionError("PreviewExecutor executor must throw PreviewedNoticeError");
89
+ } catch (PreviewedNoticeError previewed) {
90
+ return previewed.getPreviewResult();
91
+ }
92
+ }
93
+
94
+ private static class SamplingPageOutput
95
+ implements PageOutput
96
+ {
97
+ private final int sampleRows;
98
+ private final Schema schema;
99
+ private List<Page> pages;
100
+ private int recordCount;
101
+
102
+ public SamplingPageOutput(int sampleRows, Schema schema)
103
+ {
104
+ this.sampleRows = sampleRows;
105
+ this.schema = schema;
106
+ this.pages = new ArrayList<Page>();
107
+ }
108
+
109
+ @Override
110
+ public void add(Page page)
111
+ {
112
+ pages.add(page);
113
+ recordCount += PageReader.getRecordCount(page);
114
+ if (recordCount >= sampleRows) {
115
+ finish();
116
+ }
117
+ }
118
+
119
+ @Override
120
+ public void finish()
121
+ {
122
+ if (recordCount == 0) {
123
+ throw new NoSampleException("No input records to preview");
124
+ }
125
+ PreviewResult res = new PreviewResult(schema, pages);
126
+ pages = null;
127
+ throw new PreviewedNoticeError(res);
128
+ }
129
+
130
+ @Override
131
+ public void close()
132
+ {
133
+ for (Page page : pages) {
134
+ page.release();
135
+ }
136
+ }
137
+ }
138
+ }
@@ -0,0 +1,27 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import org.embulk.spi.Schema;
5
+ import org.embulk.spi.Page;
6
+
7
+ public class PreviewResult
8
+ {
9
+ private final Schema schema;
10
+ private final List<Page> pages;
11
+
12
+ public PreviewResult(Schema schema, List<Page> pages)
13
+ {
14
+ this.schema = schema;
15
+ this.pages = pages;
16
+ }
17
+
18
+ public Schema getSchema()
19
+ {
20
+ return schema;
21
+ }
22
+
23
+ public List<Page> getPages()
24
+ {
25
+ return pages;
26
+ }
27
+ }
@@ -0,0 +1,17 @@
1
+ package org.embulk.exec;
2
+
3
+ public class PreviewedNoticeError
4
+ extends Error
5
+ {
6
+ private final PreviewResult previewResult;
7
+
8
+ public PreviewedNoticeError(PreviewResult previewResult)
9
+ {
10
+ this.previewResult = previewResult;
11
+ }
12
+
13
+ public PreviewResult getPreviewResult()
14
+ {
15
+ return previewResult;
16
+ }
17
+ }
@@ -0,0 +1,116 @@
1
+ package org.embulk.exec;
2
+
3
+ import java.util.List;
4
+ import com.google.inject.Inject;
5
+ import org.embulk.config.TaskSource;
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.config.CommitReport;
8
+ import org.embulk.plugin.PluginType;
9
+ import org.embulk.spi.Schema;
10
+ import org.embulk.spi.Exec;
11
+ import org.embulk.spi.Page;
12
+ import org.embulk.spi.Buffer;
13
+ import org.embulk.spi.InputPlugin;
14
+ import org.embulk.spi.ParserPlugin;
15
+ import org.embulk.spi.FileInput;
16
+ import org.embulk.spi.PageOutput;
17
+ import static org.embulk.spi.util.Inputs.each;
18
+
19
+ /*
20
+ * Used by GuessExecutor
21
+ */
22
+ class SamplingParserPlugin
23
+ implements ParserPlugin
24
+ {
25
+ private final int maxSampleSize;
26
+
27
+ @Inject
28
+ public SamplingParserPlugin(@ForSystemConfig ConfigSource systemConfig)
29
+ {
30
+ this.maxSampleSize = 32*1024; // TODO get sample syze from system config
31
+ }
32
+
33
+ @Override
34
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
35
+ {
36
+ control.run(Exec.newTaskSource(), null);
37
+ }
38
+
39
+ @Override
40
+ public void run(TaskSource taskSource, Schema schema,
41
+ FileInput input, PageOutput output)
42
+ {
43
+ Buffer buffer = getSample(input, maxSampleSize);
44
+ throw new SampledNoticeError(buffer);
45
+ }
46
+
47
+ static Buffer runFileInputSampling(ConfigSource config)
48
+ {
49
+ // override in.parser.type so that FileInputRunner creates GuessParserPlugin
50
+ ConfigSource samplingInputConfig = config.getNested("in").deepCopy();
51
+ samplingInputConfig.getNestedOrSetEmpty("parser").set("type", "system_sampling");
52
+
53
+ final InputPlugin input = Exec.newPlugin(InputPlugin.class, samplingInputConfig.get(PluginType.class, "type"));
54
+ try {
55
+ input.transaction(samplingInputConfig, new InputPlugin.Control() {
56
+ public List<CommitReport> run(TaskSource taskSource, Schema schema, int processorCount)
57
+ {
58
+ input.run(taskSource, schema, 0, new PageOutput() {
59
+ @Override
60
+ public void add(Page page)
61
+ {
62
+ throw new RuntimeException("Input plugin must be a FileInputPlugin to guess parser configuration"); // TODO exception class
63
+ }
64
+
65
+ public void finish() { }
66
+
67
+ public void close() { }
68
+ });
69
+ throw new NoSampleException("No input files to guess parser configuration");
70
+ }
71
+ });
72
+ throw new AssertionError("SamplingParserPlugin must throw SampledNoticeError");
73
+ } catch (SampledNoticeError error) {
74
+ return error.getSample();
75
+ }
76
+ }
77
+
78
+ private static Buffer getSample(FileInput fileInput, int maxSampleSize)
79
+ {
80
+ Buffer sample = Buffer.allocate(maxSampleSize);
81
+ int sampleSize = 0;
82
+
83
+ while (fileInput.nextFile()) {
84
+ for (Buffer buffer : each(fileInput)) {
85
+ if (sampleSize >= maxSampleSize) {
86
+ // skip remaining all buffers so that FileInputPlugin.runInput doesn't
87
+ // throw exceptions at channel.join()
88
+ } else {
89
+ int size = Math.min(buffer.limit(), sample.capacity() - sampleSize);
90
+ sample.setBytes(sampleSize, buffer, 0, size);
91
+ sampleSize += size;
92
+ }
93
+ buffer.release();
94
+ }
95
+ }
96
+
97
+ sample.limit(sampleSize);
98
+ return sample;
99
+ }
100
+
101
+ public static class SampledNoticeError
102
+ extends Error
103
+ {
104
+ private final Buffer sample;
105
+
106
+ public SampledNoticeError(Buffer sample)
107
+ {
108
+ this.sample = sample;
109
+ }
110
+
111
+ public Buffer getSample()
112
+ {
113
+ return sample;
114
+ }
115
+ }
116
+ }
@@ -0,0 +1,24 @@
1
+ package org.embulk.exec;
2
+
3
+ import com.google.inject.Module;
4
+ import com.google.inject.Binder;
5
+ import org.embulk.config.ConfigSource;
6
+
7
+ public class SystemConfigModule
8
+ implements Module
9
+ {
10
+ private final ConfigSource systemConfig;
11
+
12
+ public SystemConfigModule(ConfigSource systemConfig)
13
+ {
14
+ this.systemConfig = systemConfig;
15
+ }
16
+
17
+ @Override
18
+ public void configure(Binder binder)
19
+ {
20
+ binder.bind(ConfigSource.class)
21
+ .annotatedWith(ForSystemConfig.class)
22
+ .toInstance(systemConfig);
23
+ }
24
+ }
@@ -0,0 +1,69 @@
1
+ package org.embulk.jruby;
2
+
3
+ import com.google.inject.Inject;
4
+ import org.jruby.embed.ScriptingContainer;
5
+ import org.jruby.embed.InvokeFailedException;
6
+ import org.embulk.plugin.PluginType;
7
+ import org.embulk.plugin.PluginSource;
8
+ import org.embulk.plugin.PluginSourceNotMatchException;
9
+ import org.embulk.spi.InputPlugin;
10
+ import org.embulk.spi.OutputPlugin;
11
+ import org.embulk.spi.ParserPlugin;
12
+ import org.embulk.spi.FormatterPlugin;
13
+ import org.embulk.spi.DecoderPlugin;
14
+ import org.embulk.spi.EncoderPlugin;
15
+ //import org.embulk.spi.LineFilterPlugin;
16
+ import org.embulk.spi.GuessPlugin;
17
+
18
+ public class JRubyPluginSource
19
+ implements PluginSource
20
+ {
21
+ private final ScriptingContainer jruby;
22
+ private final Object rubyPluginManager;
23
+
24
+ @Inject
25
+ public JRubyPluginSource(ScriptingContainer jruby)
26
+ {
27
+ this.jruby = jruby;
28
+
29
+ // get Embulk::Plugin
30
+ //this.rubyPluginManager = ((RubyModule) jruby.get("Embulk")).const_get(
31
+ // RubySymbol.newSymbol(
32
+ // jruby.getProvider().getRuntime(), "Plugin"));
33
+ this.rubyPluginManager = jruby.runScriptlet("Embulk::Plugin");
34
+ }
35
+
36
+ public <T> T newPlugin(Class<T> iface, PluginType type) throws PluginSourceNotMatchException
37
+ {
38
+ String name = type.getName();
39
+
40
+ String category;
41
+ if (InputPlugin.class.isAssignableFrom(iface)) {
42
+ category = "input";
43
+ } else if (OutputPlugin.class.isAssignableFrom(iface)) {
44
+ category = "output";
45
+ } else if (ParserPlugin.class.isAssignableFrom(iface)) {
46
+ category = "parser";
47
+ } else if (FormatterPlugin.class.isAssignableFrom(iface)) {
48
+ category = "formatter";
49
+ } else if (DecoderPlugin.class.isAssignableFrom(iface)) {
50
+ category = "decoder";
51
+ } else if (EncoderPlugin.class.isAssignableFrom(iface)) {
52
+ category = "encoder";
53
+ //} else if (LineFilterPlugin.class.isAssignableFrom(iface)) {
54
+ // category = "line_filter";
55
+ } else if (GuessPlugin.class.isAssignableFrom(iface)) {
56
+ category = "guess";
57
+ } else {
58
+ // unsupported plugin category
59
+ throw new PluginSourceNotMatchException("Plugin interface "+iface+" is not supported in JRuby");
60
+ }
61
+
62
+ String methodName = "new_java_" + category;
63
+ try {
64
+ return jruby.callMethod(rubyPluginManager, methodName, name, iface);
65
+ } catch (InvokeFailedException ex) {
66
+ throw new PluginSourceNotMatchException(ex.getCause());
67
+ }
68
+ }
69
+ }