embulk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,138 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.util.List;
4
+ import java.io.File;
5
+ import java.io.FileInputStream;
6
+ import java.io.InputStream;
7
+ import java.io.IOException;
8
+ import java.nio.file.Path;
9
+ import java.nio.file.Paths;
10
+ import java.nio.file.Files;
11
+ import java.nio.file.SimpleFileVisitor;
12
+ import java.nio.file.FileVisitResult;
13
+ import java.nio.file.attribute.BasicFileAttributes;
14
+ import javax.validation.constraints.NotNull;
15
+ import com.google.common.collect.ImmutableList;
16
+ import com.fasterxml.jackson.annotation.JacksonInject;
17
+ import org.embulk.config.Config;
18
+ import org.embulk.config.Task;
19
+ import org.embulk.config.TaskSource;
20
+ import org.embulk.config.ConfigSource;
21
+ import org.embulk.config.NextConfig;
22
+ import org.embulk.config.CommitReport;
23
+ import org.embulk.spi.BufferAllocator;
24
+ import org.embulk.spi.Exec;
25
+ import org.embulk.spi.FileInputPlugin;
26
+ import org.embulk.spi.TransactionalFileInput;
27
+ import org.embulk.spi.util.InputStreamFileInput;
28
+ import org.slf4j.Logger;
29
+
30
+ import static org.embulk.spi.util.Inputs.formatPath;
31
+
32
+ public class LocalFileInputPlugin
33
+ implements FileInputPlugin
34
+ {
35
+ public interface PluginTask
36
+ extends Task
37
+ {
38
+ @Config("paths")
39
+ @NotNull
40
+ public List<String> getPathPrefixes();
41
+
42
+ public List<String> getFiles();
43
+ public void setFiles(List<String> files);
44
+
45
+ @JacksonInject
46
+ public BufferAllocator getBufferAllocator();
47
+ }
48
+
49
+ private final Logger log = Exec.getLogger(getClass());
50
+
51
+ @Override
52
+ public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
53
+ {
54
+ PluginTask task = config.loadConfig(PluginTask.class);
55
+
56
+ // list files recursively
57
+ task.setFiles(listFiles(task));
58
+
59
+ // run with threads. number of processors is same with number of files
60
+ control.run(task.dump(), task.getFiles().size());
61
+
62
+ return Exec.newNextConfig();
63
+ }
64
+
65
+ public List<String> listFiles(PluginTask task)
66
+ {
67
+ final ImmutableList.Builder<String> builder = ImmutableList.builder();
68
+ for (String prefix : task.getPathPrefixes()) {
69
+ String formatted = formatPath(prefix);
70
+ try {
71
+ log.info("Listing local files with prefix '{}'", formatted);
72
+ Files.walkFileTree(Paths.get(formatted), new SimpleFileVisitor<Path>() {
73
+ @Override
74
+ public FileVisitResult visitFile(Path file, BasicFileAttributes aAttrs)
75
+ {
76
+ builder.add(file.toString());
77
+ return FileVisitResult.CONTINUE;
78
+ }
79
+ });
80
+ } catch (IOException ex) {
81
+ throw new RuntimeException(String.format("Failed get a list of local files at '%s'", formatted), ex);
82
+ }
83
+ }
84
+ return builder.build();
85
+ }
86
+
87
+ @Override
88
+ public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
89
+ {
90
+ PluginTask task = taskSource.loadTask(PluginTask.class);
91
+ return new LocalFileInput(task, processorIndex);
92
+ }
93
+
94
+ public static class LocalFileInput
95
+ extends InputStreamFileInput
96
+ implements TransactionalFileInput
97
+ {
98
+ // TODO create single-file InputStreamFileInput utility
99
+ private static class SingleFileProvider
100
+ implements InputStreamFileInput.Provider
101
+ {
102
+ private final File file;
103
+ private boolean opened = false;
104
+
105
+ public SingleFileProvider(File file)
106
+ {
107
+ this.file = file;
108
+ }
109
+
110
+ @Override
111
+ public InputStream openNext() throws IOException
112
+ {
113
+ if (opened) {
114
+ return null;
115
+ }
116
+ opened = true;
117
+ return new FileInputStream(file);
118
+ }
119
+
120
+ @Override
121
+ public void close() { }
122
+ }
123
+
124
+ public LocalFileInput(PluginTask task, int processorIndex)
125
+ {
126
+ super(task.getBufferAllocator(), new SingleFileProvider(new File(task.getFiles().get(processorIndex))));
127
+ }
128
+
129
+ @Override
130
+ public void abort() { }
131
+
132
+ @Override
133
+ public CommitReport commit()
134
+ {
135
+ return Exec.newCommitReport();
136
+ }
137
+ }
138
+ }
@@ -0,0 +1,128 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.io.File;
4
+ import java.io.FileNotFoundException;
5
+ import java.io.FileOutputStream;
6
+ import java.io.IOException;
7
+ import java.io.OutputStream;
8
+ import java.util.ArrayList;
9
+ import java.util.List;
10
+ import org.embulk.config.Config;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.config.NextConfig;
13
+ import org.embulk.config.CommitReport;
14
+ import org.embulk.config.Task;
15
+ import org.embulk.config.TaskSource;
16
+ import org.embulk.spi.Buffer;
17
+ import org.embulk.spi.FileOutputPlugin;
18
+ import org.embulk.spi.TransactionalFileOutput;
19
+ import org.embulk.spi.Exec;
20
+ import org.slf4j.Logger;
21
+
22
+ public class LocalFileOutputPlugin
23
+ implements FileOutputPlugin
24
+ {
25
+ public interface PluginTask
26
+ extends Task
27
+ {
28
+ @Config("directory")
29
+ public String getDirectory();
30
+
31
+ @Config("file_name")
32
+ public String getFileNameFormat();
33
+
34
+ @Config("file_ext")
35
+ public String getFileNameExtension();
36
+
37
+ // TODO support in FileInputPlugin and FileOutputPlugin
38
+ //@Config("compress_type")
39
+ //public String getCompressType();
40
+ }
41
+
42
+ private final Logger log = Exec.getLogger(getClass());
43
+
44
+ @Override
45
+ public NextConfig transaction(ConfigSource config, int processorCount,
46
+ FileOutputPlugin.Control control)
47
+ {
48
+ PluginTask task = config.loadConfig(PluginTask.class);
49
+
50
+ control.run(task.dump());
51
+
52
+ return Exec.newNextConfig();
53
+ }
54
+
55
+ @Override
56
+ public TransactionalFileOutput open(TaskSource taskSource, final int processorIndex)
57
+ {
58
+ PluginTask task = taskSource.loadTask(PluginTask.class);
59
+
60
+ // TODO format path using timestamp
61
+ final String fileName = task.getFileNameFormat();
62
+
63
+ final String pathPrefix = task.getDirectory() + File.separator + fileName;
64
+ final String pathSuffix = task.getFileNameExtension();
65
+
66
+ final List<String> fileNames = new ArrayList<>();
67
+
68
+ return new TransactionalFileOutput() {
69
+ private int fileIndex = 0;
70
+ private FileOutputStream output = null;
71
+
72
+ public void nextFile()
73
+ {
74
+ closeFile();
75
+ String path = pathPrefix + String.format(".%03d.%02d.", processorIndex, fileIndex) + pathSuffix;
76
+ log.info("Writing local file '{}'", path);
77
+ fileNames.add(path);
78
+ try {
79
+ output = new FileOutputStream(new File(path));
80
+ } catch (FileNotFoundException ex) {
81
+ throw new RuntimeException(ex); // TODO exception class
82
+ }
83
+ fileIndex++;
84
+ }
85
+
86
+ private void closeFile()
87
+ {
88
+ if (output != null) {
89
+ try {
90
+ output.close();
91
+ } catch (IOException ex) {
92
+ throw new RuntimeException(ex);
93
+ }
94
+ }
95
+ }
96
+
97
+ public void add(Buffer buffer)
98
+ {
99
+ try {
100
+ output.write(buffer.array(), buffer.offset(), buffer.limit());
101
+ } catch (IOException ex) {
102
+ throw new RuntimeException(ex);
103
+ }
104
+ }
105
+
106
+ public void finish()
107
+ {
108
+ closeFile();
109
+ }
110
+
111
+ public void close()
112
+ {
113
+ closeFile();
114
+ }
115
+
116
+ public void abort() { }
117
+
118
+ public CommitReport commit()
119
+ {
120
+ CommitReport report = Exec.newCommitReport();
121
+ // TODO better setting for Report
122
+ // report.set("file_names", fileNames);
123
+ // report.set("file_sizes", fileSizes);
124
+ return report;
125
+ }
126
+ };
127
+ }
128
+ }
@@ -0,0 +1,46 @@
1
+ package org.embulk.standards;
2
+
3
+ import org.embulk.config.ConfigSource;
4
+ import org.embulk.config.TaskSource;
5
+ import org.embulk.config.NextConfig;
6
+ import org.embulk.config.CommitReport;
7
+ import org.embulk.spi.Schema;
8
+ import org.embulk.spi.Page;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.spi.OutputPlugin;
11
+ import org.embulk.spi.TransactionalPageOutput;
12
+
13
+ public class NullOutputPlugin
14
+ implements OutputPlugin
15
+ {
16
+ @Override
17
+ public NextConfig transaction(ConfigSource config,
18
+ Schema schema, int processorCount,
19
+ OutputPlugin.Control control)
20
+ {
21
+ control.run(Exec.newTaskSource());
22
+ return Exec.newNextConfig();
23
+ }
24
+
25
+ @Override
26
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int processorIndex)
27
+ {
28
+ return new TransactionalPageOutput() {
29
+ public void add(Page page)
30
+ {
31
+ page.release();
32
+ }
33
+
34
+ public void finish() { }
35
+
36
+ public void close() { }
37
+
38
+ public void abort() { }
39
+
40
+ public CommitReport commit()
41
+ {
42
+ return Exec.newCommitReport();
43
+ }
44
+ };
45
+ }
46
+ }
@@ -0,0 +1,238 @@
1
+ package org.embulk.standards;
2
+
3
+ import java.util.List;
4
+ import java.io.IOException;
5
+ import java.io.InputStream;
6
+
7
+ import com.amazonaws.AmazonClientException;
8
+ import com.amazonaws.AmazonServiceException;
9
+ import com.google.common.collect.ImmutableList;
10
+ import com.google.common.base.Optional;
11
+ import com.fasterxml.jackson.annotation.JacksonInject;
12
+ import com.amazonaws.auth.AWSCredentials;
13
+ import com.amazonaws.auth.AWSCredentialsProvider;
14
+ import com.amazonaws.auth.BasicAWSCredentials;
15
+ import com.amazonaws.services.s3.AmazonS3Client;
16
+ import com.amazonaws.services.s3.model.ListObjectsRequest;
17
+ import com.amazonaws.services.s3.model.S3ObjectSummary;
18
+ import com.amazonaws.services.s3.model.ObjectListing;
19
+ import com.amazonaws.services.s3.model.GetObjectRequest;
20
+ import com.amazonaws.services.s3.model.S3Object;
21
+ import com.amazonaws.ClientConfiguration;
22
+ import org.embulk.config.Config;
23
+ import org.embulk.config.Task;
24
+ import org.embulk.config.TaskSource;
25
+ import org.embulk.config.ConfigSource;
26
+ import org.embulk.config.NextConfig;
27
+ import org.embulk.config.CommitReport;
28
+ import org.embulk.spi.BufferAllocator;
29
+ import org.embulk.spi.Exec;
30
+ import org.embulk.spi.FileInputPlugin;
31
+ import org.embulk.spi.TransactionalFileInput;
32
+ import org.embulk.spi.util.InputStreamFileInput;
33
+ import org.slf4j.Logger;
34
+
35
+ import static org.embulk.spi.util.Inputs.formatPath;
36
+
37
+ public class S3FileInputPlugin
38
+ implements FileInputPlugin
39
+ {
40
+ public interface PluginTask
41
+ extends Task
42
+ {
43
+ @Config("bucket")
44
+ public String getBucket();
45
+
46
+ @Config("paths")
47
+ public List<String> getPathPrefixes();
48
+
49
+ @Config("endpoint")
50
+ public Optional<String> getEndpoint();
51
+
52
+ // TODO timeout, ssl, etc
53
+
54
+ @Config("access_key_id")
55
+ public String getAccessKeyId();
56
+
57
+ @Config("secret_access_key")
58
+ public String getSecretAccessKey();
59
+
60
+ // TODO support more options such as STS
61
+
62
+ public List<String> getFiles();
63
+ public void setFiles(List<String> files);
64
+
65
+ @JacksonInject
66
+ public BufferAllocator getBufferAllocator();
67
+ }
68
+
69
+ private final Logger log = Exec.getLogger(getClass());
70
+
71
+ @Override
72
+ public NextConfig transaction(ConfigSource config, FileInputPlugin.Control control)
73
+ {
74
+ PluginTask task = config.loadConfig(PluginTask.class);
75
+
76
+ // list files recursively
77
+ task.setFiles(listFiles(task));
78
+
79
+ // number of processors is same with number of files
80
+
81
+ // run
82
+ control.run(task.dump(), task.getFiles().size());
83
+
84
+ return Exec.newNextConfig();
85
+ }
86
+
87
+ public static AWSCredentialsProvider getCredentialsProvider(PluginTask task)
88
+ {
89
+ final AWSCredentials cred = new BasicAWSCredentials(
90
+ task.getAccessKeyId(), task.getSecretAccessKey());
91
+ return new AWSCredentialsProvider() {
92
+ public AWSCredentials getCredentials()
93
+ {
94
+ return cred;
95
+ }
96
+
97
+ public void refresh()
98
+ {
99
+ }
100
+ };
101
+ }
102
+
103
+ private static AmazonS3Client newS3Client(PluginTask task)
104
+ {
105
+ AWSCredentialsProvider credentials = getCredentialsProvider(task);
106
+ AmazonS3Client client = newS3Client(credentials, task.getEndpoint());
107
+ return client;
108
+ }
109
+
110
+ private static AmazonS3Client newS3Client(AWSCredentialsProvider credentials,
111
+ Optional<String> endpoint)
112
+ {
113
+ // TODO get config from AmazonS3Task
114
+ ClientConfiguration clientConfig = new ClientConfiguration();
115
+ //clientConfig.setProtocol(Protocol.HTTP);
116
+ clientConfig.setMaxConnections(50); // SDK default: 50
117
+ clientConfig.setMaxErrorRetry(3); // SDK default: 3
118
+ clientConfig.setSocketTimeout(8*60*1000); // SDK default: 50*1000
119
+
120
+ AmazonS3Client client = new AmazonS3Client(credentials, clientConfig);
121
+
122
+ if (endpoint.isPresent()) {
123
+ client.setEndpoint(endpoint.get());
124
+ }
125
+
126
+ return client;
127
+ }
128
+
129
+ public List<String> listFiles(PluginTask task)
130
+ {
131
+ AmazonS3Client client = newS3Client(task);
132
+ String bucketName = task.getBucket();
133
+
134
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
135
+ for (String prefix : task.getPathPrefixes()) {
136
+ String formatted = formatPath(prefix);
137
+ try {
138
+ log.info("Listing S3 files with prefix '{}'", formatted);
139
+ builder.addAll(listS3FilesByPrefix(client, bucketName, formatted));
140
+ } catch (RuntimeException e) {
141
+ throw new RuntimeException(String.format("Failed get a list of S3 files at '%s'", formatted), e);
142
+ }
143
+ }
144
+
145
+ return builder.build();
146
+ }
147
+
148
+ /**
149
+ * Lists S3 filenames filtered by prefix.
150
+ *
151
+ * The resulting list does not include the file that's size == 0.
152
+ */
153
+ public static List<String> listS3FilesByPrefix(AmazonS3Client client, String bucketName, String prefix)
154
+ {
155
+ // TODO implement retrying
156
+ ImmutableList.Builder<String> builder = ImmutableList.builder();
157
+
158
+ String lastKey = null;
159
+ do {
160
+ ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, lastKey, null, 1024);
161
+ ObjectListing ol = client.listObjects(req);
162
+ for(S3ObjectSummary s : ol.getObjectSummaries()) {
163
+ if (s.getSize() > 0) {
164
+ builder.add(s.getKey());
165
+ }
166
+ }
167
+ lastKey = ol.getNextMarker();
168
+ } while(lastKey != null);
169
+
170
+ return builder.build();
171
+ }
172
+
173
+ @Override
174
+ public TransactionalFileInput open(TaskSource taskSource, int processorIndex)
175
+ {
176
+ PluginTask task = taskSource.loadTask(PluginTask.class);
177
+ return new S3FileInput(task, processorIndex);
178
+ }
179
+
180
+ public static class S3FileInput
181
+ extends InputStreamFileInput
182
+ implements TransactionalFileInput
183
+ {
184
+ // TODO create single-file InputStreamFileInput utility
185
+ private static class SingleFileProvider
186
+ implements InputStreamFileInput.Provider
187
+ {
188
+ private AmazonS3Client client;
189
+ private final String bucket;
190
+ private final String key;
191
+ private boolean opened = false;
192
+
193
+ public SingleFileProvider(PluginTask task, int processorIndex)
194
+ {
195
+ this.client = newS3Client(task);
196
+ this.bucket = task.getBucket();
197
+ this.key = task.getFiles().get(processorIndex);
198
+ }
199
+
200
+ @Override
201
+ public InputStream openNext() throws IOException
202
+ {
203
+ if (opened) {
204
+ return null;
205
+ }
206
+ opened = true;
207
+ GetObjectRequest request = new GetObjectRequest(bucket, key);
208
+ //if (pos > 0) {
209
+ // request.setRange(pos, contentLength);
210
+ //}
211
+ S3Object obj = client.getObject(request);
212
+ //if (pos <= 0) {
213
+ // // first call
214
+ // contentLength = obj.getObjectMetadata().getContentLength();
215
+ //}
216
+ return obj.getObjectContent();
217
+ }
218
+
219
+ @Override
220
+ public void close() { }
221
+ }
222
+
223
+ public S3FileInput(PluginTask task, int processorIndex)
224
+ {
225
+ super(task.getBufferAllocator(), new SingleFileProvider(task, processorIndex));
226
+ }
227
+
228
+ public void abort() { }
229
+
230
+ public CommitReport commit()
231
+ {
232
+ return Exec.newCommitReport();
233
+ }
234
+
235
+ @Override
236
+ public void close() { }
237
+ }
238
+ }