embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,138 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.util.List;
4
+ import java.io.File;
5
+ import java.io.FileInputStream;
6
+ import java.io.InputStream;
7
+ import java.io.IOException;
8
+ import java.nio.file.Path;
9
+ import java.nio.file.Paths;
10
+ import java.nio.file.Files;
11
+ import java.nio.file.SimpleFileVisitor;
12
+ import java.nio.file.FileVisitResult;
13
+ import java.nio.file.attribute.BasicFileAttributes;
14
+ import javax.validation.constraints.NotNull;
15
+ import com.google.common.collect.ImmutableList;
16
+ import com.fasterxml.jackson.annotation.JacksonInject;
17
+ import org.embulk.config.Config;
18
+ import org.embulk.config.Task;
19
+ import org.embulk.config.TaskSource;
20
+ import org.embulk.config.ConfigSource;
21
+ import org.embulk.config.NextConfig;
22
+ import org.embulk.config.CommitReport;
23
+ import org.embulk.spi.BufferAllocator;
24
+ import org.embulk.spi.Exec;
25
+ import org.embulk.spi.FileInputPlugin;
26
+ import org.embulk.spi.TransactionalFileInput;
27
+ import org.embulk.spi.util.InputStreamFileInput;
28
+ import org.slf4j.Logger;
29
+
30
+ import static org.embulk.spi.util.Inputs.formatPath;
31
+
32
+ public class LocalFileInputPlugin
33
+ implements FileInputPlugin
34
+ {
35
+ public interface PluginTask
36
+ extends Task
37
+ {
38
+ @Config("paths")
39
+ @NotNull
40
+ public List<String> getPathPrefixes();
41
+
42
+ public List<String> getFiles();
43
+ public void setFiles(List<String> files);
44
+
45
+ @JacksonInject
46
+ public BufferAllocator getBufferAllocator();
47
+ }
48
+
49
+ private final Logger log = Exec.getLogger(getClass());
50
+
51
+ @Override
52
+ public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
53
+ {
54
+ PluginTask task = config.loadConfig(PluginTask.class);
55
+
56
+ // list files recursively
57
+ task.setFiles(listFiles(task));
58
+
59
+ // run with threads. number of processors is same with number of files
60
+ control.run(task.dump(), task.getFiles().size());
61
+
62
+ return Exec.newNextConfig();
63
+ }
64
+
65
+ public List<String> listFiles(PluginTask task)
66
+ {
67
+ final ImmutableList.Builder<String> builder = ImmutableList.builder();
68
+ for (String prefix : task.getPathPrefixes()) {
69
+ String formatted = formatPath(prefix);
70
+ try {
71
+ log.info("Listing local files with prefix '{}'", formatted);
72
+ Files.walkFileTree(Paths.get(formatted), new SimpleFileVisitor<Path>() {
73
+ @Override
74
+ public FileVisitResult visitFile(Path file, BasicFileAttributes aAttrs)
75
+ {
76
+ builder.add(file.toString());
77
+ return FileVisitResult.CONTINUE;
78
+ }
79
+ });
80
+ } catch (IOException ex) {
81
+ throw new RuntimeException(String.format("Failed get a list of local files at '%s'", formatted), ex);
82
+ }
83
+ }
84
+ return builder.build();
85
+ }
86
+
87
+ @Override
88
+ public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
89
+ {
90
+ PluginTask task = taskSource.loadTask(PluginTask.class);
91
+ return new LocalFileInput(task, processorIndex);
92
+ }
93
+
94
+ public static class LocalFileInput
95
+ extends InputStreamFileInput
96
+ implements TransactionalFileInput
97
+ {
98
+ // TODO create single-file InputStreamFileInput utility
99
+ private static class SingleFileProvider
100
+ implements InputStreamFileInput.Provider
101
+ {
102
+ private final File file;
103
+ private boolean opened = false;
104
+
105
+ public SingleFileProvider(File file)
106
+ {
107
+ this.file = file;
108
+ }
109
+
110
+ @Override
111
+ public InputStream openNext() throws IOException
112
+ {
113
+ if (opened) {
114
+ return null;
115
+ }
116
+ opened = true;
117
+ return new FileInputStream(file);
118
+ }
119
+
120
+ @Override
121
+ public void close() { }
122
+ }
123
+
124
+ public LocalFileInput(PluginTask task, int processorIndex)
125
+ {
126
+ super(task.getBufferAllocator(), new SingleFileProvider(new File(task.getFiles().get(processorIndex))));
127
+ }
128
+
129
+ @Override
130
+ public void abort() { }
131
+
132
+ @Override
133
+ public CommitReport commit()
134
+ {
135
+ return Exec.newCommitReport();
136
+ }
137
+ }
138
+ }
@@ -0,0 +1,128 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.File;
4
+ import java.io.FileNotFoundException;
5
+ import java.io.FileOutputStream;
6
+ import java.io.IOException;
7
+ import java.io.OutputStream;
8
+ import java.util.ArrayList;
9
+ import java.util.List;
10
+ import org.embulk.config.Config;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.NextConfig;
13
+ import org.embulk.config.CommitReport;
14
+ import org.embulk.config.Task;
15
+ import org.embulk.config.TaskSource;
16
+ import org.embulk.spi.Buffer;
17
+ import org.embulk.spi.FileOutputPlugin;
18
+ import org.embulk.spi.TransactionalFileOutput;
19
+ import org.embulk.spi.Exec;
20
+ import org.slf4j.Logger;
21
+
22
+ public class LocalFileOutputPlugin
23
+ implements FileOutputPlugin
24
+ {
25
+ public interface PluginTask
26
+ extends Task
27
+ {
28
+ @Config("directory")
29
+ public String getDirectory();
30
+
31
+ @Config("file_name")
32
+ public String getFileNameFormat();
33
+
34
+ @Config("file_ext")
35
+ public String getFileNameExtension();
36
+
37
+ // TODO support in FileInputPlugin and FileOutputPlugin
38
+ //@Config("compress_type")
39
+ //public String getCompressType();
40
+ }
41
+
42
+ private final Logger log = Exec.getLogger(getClass());
43
+
44
+ @Override
45
+ public NextConfig transaction(ConfigSource config, int processorCount,
46
+ FileOutputPlugin.Control control)
47
+ {
48
+ PluginTask task = config.loadConfig(PluginTask.class);
49
+
50
+ control.run(task.dump());
51
+
52
+ return Exec.newNextConfig();
53
+ }
54
+
55
+ @Override
56
+ public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
57
+ {
58
+ PluginTask task = taskSource.loadTask(PluginTask.class);
59
+
60
+ // TODO format path using timestamp
61
+ final String fileName = task.getFileNameFormat();
62
+
63
+ final String pathPrefix = task.getDirectory() + File.separator + fileName;
64
+ final String pathSuffix = task.getFileNameExtension();
65
+
66
+ final List<String> fileNames = new ArrayList<>();
67
+
68
+ return new TransactionalFileOutput() {
69
+ private int fileIndex = 0;
70
+ private FileOutputStream output = null;
71
+
72
+ public void nextFile()
73
+ {
74
+ closeFile();
75
+ String path = pathPrefix + String.format(".%03d.%02d.", processorIndex, fileIndex) + pathSuffix;
76
+ log.info("Writing local file '{}'", path);
77
+ fileNames.add(path);
78
+ try {
79
+ output = new FileOutputStream(new File(path));
80
+ } catch (FileNotFoundException ex) {
81
+ throw new RuntimeException(ex); // TODO exception class
82
+ }
83
+ fileIndex++;
84
+ }
85
+
86
+ private void closeFile()
87
+ {
88
+ if (output != null) {
89
+ try {
90
+ output.close();
91
+ } catch (IOException ex) {
92
+ throw new RuntimeException(ex);
93
+ }
94
+ }
95
+ }
96
+
97
+ public void add(Buffer buffer)
98
+ {
99
+ try {
100
+ output.write(buffer.array(), buffer.offset(), buffer.limit());
101
+ } catch (IOException ex) {
102
+ throw new RuntimeException(ex);
103
+ }
104
+ }
105
+
106
+ public void finish()
107
+ {
108
+ closeFile();
109
+ }
110
+
111
+ public void close()
112
+ {
113
+ closeFile();
114
+ }
115
+
116
+ public void abort() { }
117
+
118
+ public CommitReport commit()
119
+ {
120
+ CommitReport report = Exec.newCommitReport();
121
+ // TODO better setting for Report
122
+ // report.set("file_names", fileNames);
123
+ // report.set("file_sizes", fileSizes);
124
+ return report;
125
+ }
126
+ };
127
+ }
128
+ }
@@ -0,0 +1,46 @@
1
+ package org.embulk.standards;
2
+
3
+ import org.embulk.config.ConfigSource;
4
+ import org.embulk.config.TaskSource;
5
+ import org.embulk.config.NextConfig;
6
+ import org.embulk.config.CommitReport;
7
+ import org.embulk.spi.Schema;
8
+ import org.embulk.spi.Page;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.spi.OutputPlugin;
11
+ import org.embulk.spi.TransactionalPageOutput;
12
+
13
+ public class NullOutputPlugin
14
+ implements OutputPlugin
15
+ {
16
+ @Override
17
+ public NextConfig transaction(ConfigSource config,
18
+ Schema schema, int processorCount,
19
+ OutputPlugin.Control control)
20
+ {
21
+ control.run(Exec.newTaskSource());
22
+ return Exec.newNextConfig();
23
+ }
24
+
25
+ @Override
26
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
27
+ {
28
+ return new TransactionalPageOutput() {
29
+ public void add(Page page)
30
+ {
31
+ page.release();
32
+ }
33
+
34
+ public void finish() { }
35
+
36
+ public void close() { }
37
+
38
+ public void abort() { }
39
+
40
+ public CommitReport commit()
41
+ {
42
+ return Exec.newCommitReport();
43
+ }
44
+ };
45
+ }
46
+ }
@@ -0,0 +1,238 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.util.List;
4
+ import java.io.IOException;
5
+ import java.io.InputStream;
6
+
7
+ import com.amazonaws.AmazonClientException;
8
+ import com.amazonaws.AmazonServiceException;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.common.base.Optional;
11
+ import com.fasterxml.jackson.annotation.JacksonInject;
12
+ import com.amazonaws.auth.AWSCredentials;
13
+ import com.amazonaws.auth.AWSCredentialsProvider;
14
+ import com.amazonaws.auth.BasicAWSCredentials;
15
+ import com.amazonaws.services.s3.AmazonS3Client;
16
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
17
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
18
+ import com.amazonaws.services.s3.model.ObjectListing;
19
+ import com.amazonaws.services.s3.model.GetObjectRequest;
20
+ import com.amazonaws.services.s3.model.S3Object;
21
+ import com.amazonaws.ClientConfiguration;
22
+ import org.embulk.config.Config;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskSource;
25
+ import org.embulk.config.ConfigSource;
26
+ import org.embulk.config.NextConfig;
27
+ import org.embulk.config.CommitReport;
28
+ import org.embulk.spi.BufferAllocator;
29
+ import org.embulk.spi.Exec;
30
+ import org.embulk.spi.FileInputPlugin;
31
+ import org.embulk.spi.TransactionalFileInput;
32
+ import org.embulk.spi.util.InputStreamFileInput;
33
+ import org.slf4j.Logger;
34
+
35
+ import static org.embulk.spi.util.Inputs.formatPath;
36
+
37
+ public class S3FileInputPlugin
38
+ implements FileInputPlugin
39
+ {
40
+ public interface PluginTask
41
+ extends Task
42
+ {
43
+ @Config("bucket")
44
+ public String getBucket();
45
+
46
+ @Config("paths")
47
+ public List<String> getPathPrefixes();
48
+
49
+ @Config("endpoint")
50
+ public Optional<String> getEndpoint();
51
+
52
+ // TODO timeout, ssl, etc
53
+
54
+ @Config("access_key_id")
55
+ public String getAccessKeyId();
56
+
57
+ @Config("secret_access_key")
58
+ public String getSecretAccessKey();
59
+
60
+ // TODO support more options such as STS
61
+
62
+ public List<String> getFiles();
63
+ public void setFiles(List<String> files);
64
+
65
+ @JacksonInject
66
+ public BufferAllocator getBufferAllocator();
67
+ }
68
+
69
+ private final Logger log = Exec.getLogger(getClass());
70
+
71
+ @Override
72
+ public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
73
+ {
74
+ PluginTask task = config.loadConfig(PluginTask.class);
75
+
76
+ // list files recursively
77
+ task.setFiles(listFiles(task));
78
+
79
+ // number of processors is same with number of files
80
+
81
+ // run
82
+ control.run(task.dump(), task.getFiles().size());
83
+
84
+ return Exec.newNextConfig();
85
+ }
86
+
87
+ public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
88
+ {
89
+ final AWSCredentials cred = new BasicAWSCredentials(
90
+ task.getAccessKeyId(), task.getSecretAccessKey());
91
+ return new AWSCredentialsProvider() {
92
+ public AWSCredentials getCredentials()
93
+ {
94
+ return cred;
95
+ }
96
+
97
+ public void refresh()
98
+ {
99
+ }
100
+ };
101
+ }
102
+
103
+ private static AmazonS3Client newS3Client(PluginTask task)
104
+ {
105
+ AWSCredentialsProvider credentials = getCredentialsProvider(task);
106
+ AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
107
+ return client;
108
+ }
109
+
110
+ private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
111
+ Optional<String> endpoint)
112
+ {
113
+ // TODO get config from AmazonS3Task
114
+ ClientConfiguration clientConfig = new ClientConfiguration();
115
+ //clientConfig.setProtocol(Protocol.HTTP);
116
+ clientConfig.setMaxConnections(50); // SDK default: 50
117
+ clientConfig.setMaxErrorRetry(3); // SDK default: 3
118
+ clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
119
+
120
+ AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
121
+
122
+ if (endpoint.isPresent()) {
123
+ client.setEndpoint(endpoint.get());
124
+ }
125
+
126
+ return client;
127
+ }
128
+
129
+ public List<String> listFiles(PluginTask task)
130
+ {
131
+ AmazonS3Client client = newS3Client(task);
132
+ String bucketName = task.getBucket();
133
+
134
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
135
+ for (String prefix : task.getPathPrefixes()) {
136
+ String formatted = formatPath(prefix);
137
+ try {
138
+ log.info("Listing S3 files with prefix '{}'", formatted);
139
+ builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
140
+ } catch (RuntimeException e) {
141
+ throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
142
+ }
143
+ }
144
+
145
+ return builder.build();
146
+ }
147
+
148
+ /**
149
+ * Lists S3 filenames filtered by prefix.
150
+ *
151
+ * The resulting list does not include the file that's size == 0.
152
+ */
153
+ public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
154
+ {
155
+ // TODO implement retrying
156
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
157
+
158
+ String lastKey = null;
159
+ do {
160
+ ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
161
+ ObjectListing ol = client.listObjects(req);
162
+ for(S3ObjectSummary s : ol.getObjectSummaries()) {
163
+ if (s.getSize() > 0) {
164
+ builder.add(s.getKey());
165
+ }
166
+ }
167
+ lastKey = ol.getNextMarker();
168
+ } while(lastKey != null);
169
+
170
+ return builder.build();
171
+ }
172
+
173
+ @Override
174
+ public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
175
+ {
176
+ PluginTask task = taskSource.loadTask(PluginTask.class);
177
+ return new S3FileInput(task, processorIndex);
178
+ }
179
+
180
+ public static class S3FileInput
181
+ extends InputStreamFileInput
182
+ implements TransactionalFileInput
183
+ {
184
+ // TODO create single-file InputStreamFileInput utility
185
+ private static class SingleFileProvider
186
+ implements InputStreamFileInput.Provider
187
+ {
188
+ private AmazonS3Client client;
189
+ private final String bucket;
190
+ private final String key;
191
+ private boolean opened = false;
192
+
193
+ public SingleFileProvider(PluginTask task, int processorIndex)
194
+ {
195
+ this.client = newS3Client(task);
196
+ this.bucket = task.getBucket();
197
+ this.key = task.getFiles().get(processorIndex);
198
+ }
199
+
200
+ @Override
201
+ public InputStream openNext() throws IOException
202
+ {
203
+ if (opened) {
204
+ return null;
205
+ }
206
+ opened = true;
207
+ GetObjectRequest request = new GetObjectRequest(bucket, key);
208
+ //if (pos > 0) {
209
+ // request.setRange(pos, contentLength);
210
+ //}
211
+ S3Object obj = client.getObject(request);
212
+ //if (pos <= 0) {
213
+ // // first call
214
+ // contentLength = obj.getObjectMetadata().getContentLength();
215
+ //}
216
+ return obj.getObjectContent();
217
+ }
218
+
219
+ @Override
220
+ public void close() { }
221
+ }
222
+
223
+ public S3FileInput(PluginTask task, int processorIndex)
224
+ {
225
+ super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
226
+ }
227
+
228
+ public void abort() { }
229
+
230
+ public CommitReport commit()
231
+ {
232
+ return Exec.newCommitReport();
233
+ }
234
+
235
+ @Override
236
+ public void close() { }
237
+ }
238
+ }