embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,111 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.Arrays;
4
+ import java.util.Iterator;
5
+ import java.io.InputStream;
6
+ import java.io.Closeable;
7
+ import java.io.IOException;
8
+ import org.embulk.spi.Buffer;
9
+ import org.embulk.spi.FileInput;
10
+ import org.embulk.spi.BufferAllocator;
11
+
12
+ public class InputStreamFileInput
13
+ implements FileInput
14
+ {
15
+ public interface Provider extends Closeable
16
+ {
17
+ public InputStream openNext() throws IOException;
18
+
19
+ public void close() throws IOException;
20
+ }
21
+
22
+ public static class IteratorProvider implements Provider {
23
+ private Iterator<InputStream> iterator;
24
+
25
+ public IteratorProvider(Iterable<InputStream> iterable)
26
+ {
27
+ this.iterator = iterable.iterator();
28
+ }
29
+
30
+ public IteratorProvider(Iterator<InputStream> iterator)
31
+ {
32
+ this.iterator = iterator;
33
+ }
34
+
35
+ public InputStream openNext() throws IOException
36
+ {
37
+ if (!iterator.hasNext()) {
38
+ return null;
39
+ }
40
+ return iterator.next();
41
+ }
42
+
43
+ @Override
44
+ public void close() throws IOException
45
+ {
46
+ while (iterator.hasNext()) {
47
+ iterator.next().close();
48
+ }
49
+ }
50
+ }
51
+
52
+ private final BufferAllocator allocator;
53
+ private final Provider provider;
54
+ private InputStream current;
55
+
56
+ public InputStreamFileInput(BufferAllocator allocator, Provider provider)
57
+ {
58
+ this.allocator = allocator;
59
+ this.provider = provider;
60
+ this.current = null;
61
+ }
62
+
63
+ public Buffer poll()
64
+ {
65
+ // TODO check current != null and throw Illegal State - file is not opened
66
+ if (current == null) {
67
+ throw new IllegalStateException("openNext must be called before poll()");
68
+ }
69
+ Buffer buffer = allocator.allocate();
70
+ try {
71
+ int n = current.read(buffer.array(), buffer.offset(), buffer.capacity());
72
+ if (n < 0) {
73
+ return null;
74
+ }
75
+ buffer.limit(n);
76
+ Buffer b = buffer;
77
+ buffer = null;
78
+ return b;
79
+ } catch (IOException ex) {
80
+ throw new RuntimeException(ex);
81
+ } finally {
82
+ if (buffer != null) {
83
+ buffer.release();
84
+ buffer = null;
85
+ }
86
+ }
87
+ }
88
+
89
+ public boolean nextFile()
90
+ {
91
+ try {
92
+ if (current != null) {
93
+ current.close();
94
+ current = null;
95
+ }
96
+ current = provider.openNext();
97
+ return current != null;
98
+ } catch (IOException ex) {
99
+ throw new RuntimeException(ex);
100
+ }
101
+ }
102
+
103
+ public void close()
104
+ {
105
+ try {
106
+ provider.close();
107
+ } catch (IOException ex) {
108
+ throw new RuntimeException(ex);
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,74 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.Iterator;
4
+ import java.util.NoSuchElementException;
5
+
6
+ import org.embulk.spi.Exec;
7
+ import org.embulk.spi.FileInput;
8
+ import org.embulk.spi.Buffer;
9
+ import org.embulk.spi.time.Timestamp;
10
+ import org.embulk.spi.time.TimestampFormatter;
11
+ import org.joda.time.DateTimeZone;
12
+
13
+ public class Inputs
14
+ {
15
+ private static abstract class AbstractPollIterator <E>
16
+ implements Iterator<E>
17
+ {
18
+ private E next;
19
+
20
+ protected abstract E poll();
21
+
22
+ @Override
23
+ public boolean hasNext()
24
+ {
25
+ if (next != null) {
26
+ return true;
27
+ } else {
28
+ next = poll();
29
+ return next != null;
30
+ }
31
+ }
32
+
33
+ @Override
34
+ public E next()
35
+ {
36
+ if (!hasNext()) {
37
+ throw new NoSuchElementException();
38
+ }
39
+ E l = next;
40
+ next = null;
41
+ return l;
42
+ }
43
+
44
+ @Override
45
+ public void remove()
46
+ {
47
+ throw new UnsupportedOperationException();
48
+ }
49
+ }
50
+
51
+ public static Iterable<Buffer> each(final FileInput input)
52
+ {
53
+ return new Iterable<Buffer>() {
54
+ public Iterator<Buffer> iterator()
55
+ {
56
+ return new AbstractPollIterator<Buffer>() {
57
+ public Buffer poll()
58
+ {
59
+ return input.poll();
60
+ }
61
+ };
62
+ }
63
+ };
64
+ }
65
+
66
+ public static String formatPath(String pathFormat)
67
+ {
68
+ Timestamp timestamp = Exec.session().getTransactionTime();
69
+ DateTimeZone timezone = Exec.session().getTransactionTimeZone();
70
+ // newTimestampFormatter (eventually calls org.jruby.util.RubyDateFormat.<init> doesn't throw exceptions
71
+ TimestampFormatter formatter = Exec.session().newTimestampFormatter(pathFormat, timezone);
72
+ return formatter.format(timestamp); // TimestampFormatter.format doesn't throw exceptions
73
+ }
74
+ }
@@ -0,0 +1,118 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.Iterator;
4
+ import java.util.NoSuchElementException;
5
+ import java.io.Reader;
6
+ import java.io.BufferedReader;
7
+ import java.io.InputStreamReader;
8
+ import java.io.IOException;
9
+ import java.nio.charset.Charset;
10
+ import java.nio.charset.CharsetDecoder;
11
+ import java.nio.charset.CodingErrorAction;
12
+ import org.embulk.config.Task;
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.spi.FileInput;
16
+
17
+ public class LineDecoder
18
+ implements AutoCloseable, Iterable<String>
19
+ {
20
+ // TODO optimize
21
+
22
+ public static interface DecoderTask
23
+ extends Task
24
+ {
25
+ @Config("charset")
26
+ @ConfigDefault("\"utf-8\"")
27
+ public Charset getCharset();
28
+
29
+ @Config("newline")
30
+ @ConfigDefault("\"CRLF\"")
31
+ public Newline getNewline();
32
+ }
33
+
34
+ private final FileInputInputStream inputStream;
35
+ private final BufferedReader reader;
36
+
37
+ public LineDecoder(FileInput in, DecoderTask task)
38
+ {
39
+ CharsetDecoder decoder = task.getCharset()
40
+ .newDecoder()
41
+ .onMalformedInput(CodingErrorAction.REPLACE) // TODO configurable?
42
+ .onUnmappableCharacter(CodingErrorAction.REPLACE); // TODO configurable?
43
+ this.inputStream = new FileInputInputStream(in);
44
+ this.reader = new BufferedReader(new InputStreamReader(inputStream, decoder));
45
+ }
46
+
47
+ public boolean nextFile()
48
+ {
49
+ return inputStream.nextFile();
50
+ }
51
+
52
+ public String poll()
53
+ {
54
+ try {
55
+ return reader.readLine();
56
+ } catch (IOException ex) {
57
+ // unexpected
58
+ throw new RuntimeException(ex);
59
+ }
60
+ }
61
+
62
+ public void close()
63
+ {
64
+ try {
65
+ reader.close();
66
+ } catch (IOException ex) {
67
+ // unexpected
68
+ throw new RuntimeException(ex);
69
+ }
70
+ }
71
+
72
+ public Iterator<String> iterator()
73
+ {
74
+ return new Ite(this);
75
+ }
76
+
77
+ private String nextLine;
78
+
79
+ private static class Ite
80
+ implements Iterator<String>
81
+ {
82
+ private LineDecoder self;
83
+
84
+ public Ite(LineDecoder self)
85
+ {
86
+ // TODO non-static inner class causes a problem with JRuby
87
+ this.self = self;
88
+ }
89
+
90
+ @Override
91
+ public boolean hasNext()
92
+ {
93
+ if (self.nextLine != null) {
94
+ return true;
95
+ } else {
96
+ self.nextLine = self.poll();
97
+ return self.nextLine != null;
98
+ }
99
+ }
100
+
101
+ @Override
102
+ public String next()
103
+ {
104
+ if (!hasNext()) {
105
+ throw new NoSuchElementException();
106
+ }
107
+ String l = self.nextLine;
108
+ self.nextLine = null;
109
+ return l;
110
+ }
111
+
112
+ @Override
113
+ public void remove()
114
+ {
115
+ throw new UnsupportedOperationException();
116
+ }
117
+ }
118
+ }
@@ -0,0 +1,109 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.io.Writer;
4
+ import java.io.OutputStreamWriter;
5
+ import java.io.IOException;
6
+ import java.nio.charset.Charset;
7
+ import java.nio.charset.CharsetEncoder;
8
+ import java.nio.charset.CodingErrorAction;
9
+ import com.fasterxml.jackson.annotation.JacksonInject;
10
+ import org.embulk.config.Task;
11
+ import org.embulk.config.Config;
12
+ import org.embulk.config.ConfigDefault;
13
+ import org.embulk.spi.FileOutput;
14
+ import org.embulk.spi.BufferAllocator;
15
+
16
+ public class LineEncoder
17
+ implements AutoCloseable
18
+ {
19
+ // TODO optimize
20
+
21
+ public interface EncoderTask
22
+ extends Task
23
+ {
24
+ @Config("charset")
25
+ @ConfigDefault("\"utf-8\"")
26
+ public Charset getCharset();
27
+
28
+ @Config("newline")
29
+ @ConfigDefault("\"CRLF\"")
30
+ public Newline getNewline();
31
+
32
+ @JacksonInject
33
+ public BufferAllocator getBufferAllocator();
34
+ }
35
+
36
+ private final String newline;
37
+ private final FileOutputOutputStream outputStream;
38
+ private final Writer writer;
39
+
40
+ public LineEncoder(FileOutput out, EncoderTask task)
41
+ {
42
+ CharsetEncoder encoder = task.getCharset()
43
+ .newEncoder()
44
+ .onMalformedInput(CodingErrorAction.REPLACE) // TODO configurable?
45
+ .onUnmappableCharacter(CodingErrorAction.REPLACE); // TODO configurable?
46
+ this.newline = task.getNewline().getString();
47
+ this.outputStream = new FileOutputOutputStream(out, task.getBufferAllocator());
48
+ this.writer = new OutputStreamWriter(outputStream, encoder);
49
+ }
50
+
51
+ public void addNewLine()
52
+ {
53
+ try {
54
+ writer.append(newline);
55
+ } catch (IOException ex) {
56
+ // unexpected
57
+ throw new RuntimeException(ex);
58
+ }
59
+ }
60
+
61
+ public void addLine(String line)
62
+ {
63
+ try {
64
+ writer.append(line);
65
+ } catch (IOException ex) {
66
+ // unexpected
67
+ throw new RuntimeException(ex);
68
+ }
69
+ addNewLine();
70
+ }
71
+
72
+ public void addText(String text)
73
+ {
74
+ try {
75
+ writer.append(text);
76
+ } catch (IOException ex) {
77
+ // unexpected
78
+ throw new RuntimeException(ex);
79
+ }
80
+ }
81
+
82
+ public void nextFile()
83
+ {
84
+ try {
85
+ writer.flush();
86
+ } catch (IOException ex) {
87
+ // unexpected
88
+ throw new RuntimeException(ex);
89
+ }
90
+ outputStream.nextFile();
91
+ }
92
+
93
+ public void finish()
94
+ {
95
+ close(); // flush all remaining buffer in writer
96
+ outputStream.finish();
97
+ }
98
+
99
+ @Override
100
+ public void close()
101
+ {
102
+ try {
103
+ writer.close();
104
+ } catch (IOException ex) {
105
+ // unexpected
106
+ throw new RuntimeException(ex);
107
+ }
108
+ }
109
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Iterator;
6
+ import org.embulk.spi.Buffer;
7
+ import org.embulk.spi.FileInput;
8
+
9
+ public class ListFileInput
10
+ implements FileInput
11
+ {
12
+ private Iterator<? extends Iterable<Buffer>> files;
13
+ private Iterator<Buffer> currentBuffers;
14
+
15
+ public ListFileInput(Iterable<? extends Iterable<Buffer>> files)
16
+ {
17
+ this.files = files.iterator();
18
+ }
19
+
20
+ public boolean nextFile()
21
+ {
22
+ if (!files.hasNext()) {
23
+ return false;
24
+ }
25
+ currentBuffers = files.next().iterator();
26
+ return true;
27
+ }
28
+
29
+ public Buffer poll()
30
+ {
31
+ if (currentBuffers == null) {
32
+ throw new IllegalStateException("FileInput.nextFile is not called");
33
+ }
34
+ if (!currentBuffers.hasNext()) {
35
+ return null;
36
+ }
37
+ return currentBuffers.next();
38
+ }
39
+
40
+ public void close()
41
+ {
42
+ do {
43
+ while (true) {
44
+ Buffer b = poll();
45
+ if (b == null) {
46
+ break;
47
+ }
48
+ b.release();
49
+ }
50
+ } while (nextFile());
51
+ }
52
+ }