embulk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,111 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.Arrays;
4
+ import java.util.Iterator;
5
+ import java.io.InputStream;
6
+ import java.io.Closeable;
7
+ import java.io.IOException;
8
+ import org.embulk.spi.Buffer;
9
+ import org.embulk.spi.FileInput;
10
+ import org.embulk.spi.BufferAllocator;
11
+
12
+ public class InputStreamFileInput
13
+ implements FileInput
14
+ {
15
+ public interface Provider extends Closeable
16
+ {
17
+ public InputStream openNext() throws IOException;
18
+
19
+ public void close() throws IOException;
20
+ }
21
+
22
+ public static class IteratorProvider implements Provider {
23
+ private Iterator<InputStream> iterator;
24
+
25
+ public IteratorProvider(Iterable<InputStream> iterable)
26
+ {
27
+ this.iterator = iterable.iterator();
28
+ }
29
+
30
+ public IteratorProvider(Iterator<InputStream> iterator)
31
+ {
32
+ this.iterator = iterator;
33
+ }
34
+
35
+ public InputStream openNext() throws IOException
36
+ {
37
+ if (!iterator.hasNext()) {
38
+ return null;
39
+ }
40
+ return iterator.next();
41
+ }
42
+
43
+ @Override
44
+ public void close() throws IOException
45
+ {
46
+ while (iterator.hasNext()) {
47
+ iterator.next().close();
48
+ }
49
+ }
50
+ }
51
+
52
+ private final BufferAllocator allocator;
53
+ private final Provider provider;
54
+ private InputStream current;
55
+
56
+ public InputStreamFileInput(BufferAllocator allocator, Provider provider)
57
+ {
58
+ this.allocator = allocator;
59
+ this.provider = provider;
60
+ this.current = null;
61
+ }
62
+
63
+ public Buffer poll()
64
+ {
65
+ // TODO check current != null and throw Illegal State - file is not opened
66
+ if (current == null) {
67
+ throw new IllegalStateException("openNext must be called before poll()");
68
+ }
69
+ Buffer buffer = allocator.allocate();
70
+ try {
71
+ int n = current.read(buffer.array(), buffer.offset(), buffer.capacity());
72
+ if (n < 0) {
73
+ return null;
74
+ }
75
+ buffer.limit(n);
76
+ Buffer b = buffer;
77
+ buffer = null;
78
+ return b;
79
+ } catch (IOException ex) {
80
+ throw new RuntimeException(ex);
81
+ } finally {
82
+ if (buffer != null) {
83
+ buffer.release();
84
+ buffer = null;
85
+ }
86
+ }
87
+ }
88
+
89
+ public boolean nextFile()
90
+ {
91
+ try {
92
+ if (current != null) {
93
+ current.close();
94
+ current = null;
95
+ }
96
+ current = provider.openNext();
97
+ return current != null;
98
+ } catch (IOException ex) {
99
+ throw new RuntimeException(ex);
100
+ }
101
+ }
102
+
103
+ public void close()
104
+ {
105
+ try {
106
+ provider.close();
107
+ } catch (IOException ex) {
108
+ throw new RuntimeException(ex);
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,74 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.Iterator;
4
+ import java.util.NoSuchElementException;
5
+
6
+ import org.embulk.spi.Exec;
7
+ import org.embulk.spi.FileInput;
8
+ import org.embulk.spi.Buffer;
9
+ import org.embulk.spi.time.Timestamp;
10
+ import org.embulk.spi.time.TimestampFormatter;
11
+ import org.joda.time.DateTimeZone;
12
+
13
+ public class Inputs
14
+ {
15
+ private static abstract class AbstractPollIterator <E>
16
+ implements Iterator<E>
17
+ {
18
+ private E next;
19
+
20
+ protected abstract E poll();
21
+
22
+ @Override
23
+ public boolean hasNext()
24
+ {
25
+ if (next != null) {
26
+ return true;
27
+ } else {
28
+ next = poll();
29
+ return next != null;
30
+ }
31
+ }
32
+
33
+ @Override
34
+ public E next()
35
+ {
36
+ if (!hasNext()) {
37
+ throw new NoSuchElementException();
38
+ }
39
+ E l = next;
40
+ next = null;
41
+ return l;
42
+ }
43
+
44
+ @Override
45
+ public void remove()
46
+ {
47
+ throw new UnsupportedOperationException();
48
+ }
49
+ }
50
+
51
+ public static Iterable<Buffer> each(final FileInput input)
52
+ {
53
+ return new Iterable<Buffer>() {
54
+ public Iterator<Buffer> iterator()
55
+ {
56
+ return new AbstractPollIterator<Buffer>() {
57
+ public Buffer poll()
58
+ {
59
+ return input.poll();
60
+ }
61
+ };
62
+ }
63
+ };
64
+ }
65
+
66
+ public static String formatPath(String pathFormat)
67
+ {
68
+ Timestamp timestamp = Exec.session().getTransactionTime();
69
+ DateTimeZone timezone = Exec.session().getTransactionTimeZone();
70
+ // newTimestampFormatter (eventually calls org.jruby.util.RubyDateFormat.<init> doesn't throw exceptions
71
+ TimestampFormatter formatter = Exec.session().newTimestampFormatter(pathFormat, timezone);
72
+ return formatter.format(timestamp); // TimestampFormatter.format doesn't throw exceptions
73
+ }
74
+ }
@@ -0,0 +1,118 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.Iterator;
4
+ import java.util.NoSuchElementException;
5
+ import java.io.Reader;
6
+ import java.io.BufferedReader;
7
+ import java.io.InputStreamReader;
8
+ import java.io.IOException;
9
+ import java.nio.charset.Charset;
10
+ import java.nio.charset.CharsetDecoder;
11
+ import java.nio.charset.CodingErrorAction;
12
+ import org.embulk.config.Task;
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.spi.FileInput;
16
+
17
+ public class LineDecoder
18
+ implements AutoCloseable, Iterable<String>
19
+ {
20
+ // TODO optimize
21
+
22
+ public static interface DecoderTask
23
+ extends Task
24
+ {
25
+ @Config("charset")
26
+ @ConfigDefault("\"utf-8\"")
27
+ public Charset getCharset();
28
+
29
+ @Config("newline")
30
+ @ConfigDefault("\"CRLF\"")
31
+ public Newline getNewline();
32
+ }
33
+
34
+ private final FileInputInputStream inputStream;
35
+ private final BufferedReader reader;
36
+
37
+ public LineDecoder(FileInput in, DecoderTask task)
38
+ {
39
+ CharsetDecoder decoder = task.getCharset()
40
+ .newDecoder()
41
+ .onMalformedInput(CodingErrorAction.REPLACE) // TODO configurable?
42
+ .onUnmappableCharacter(CodingErrorAction.REPLACE); // TODO configurable?
43
+ this.inputStream = new FileInputInputStream(in);
44
+ this.reader = new BufferedReader(new InputStreamReader(inputStream, decoder));
45
+ }
46
+
47
+ public boolean nextFile()
48
+ {
49
+ return inputStream.nextFile();
50
+ }
51
+
52
+ public String poll()
53
+ {
54
+ try {
55
+ return reader.readLine();
56
+ } catch (IOException ex) {
57
+ // unexpected
58
+ throw new RuntimeException(ex);
59
+ }
60
+ }
61
+
62
+ public void close()
63
+ {
64
+ try {
65
+ reader.close();
66
+ } catch (IOException ex) {
67
+ // unexpected
68
+ throw new RuntimeException(ex);
69
+ }
70
+ }
71
+
72
+ public Iterator<String> iterator()
73
+ {
74
+ return new Ite(this);
75
+ }
76
+
77
+ private String nextLine;
78
+
79
+ private static class Ite
80
+ implements Iterator<String>
81
+ {
82
+ private LineDecoder self;
83
+
84
+ public Ite(LineDecoder self)
85
+ {
86
+ // TODO non-static inner class causes a problem with JRuby
87
+ this.self = self;
88
+ }
89
+
90
+ @Override
91
+ public boolean hasNext()
92
+ {
93
+ if (self.nextLine != null) {
94
+ return true;
95
+ } else {
96
+ self.nextLine = self.poll();
97
+ return self.nextLine != null;
98
+ }
99
+ }
100
+
101
+ @Override
102
+ public String next()
103
+ {
104
+ if (!hasNext()) {
105
+ throw new NoSuchElementException();
106
+ }
107
+ String l = self.nextLine;
108
+ self.nextLine = null;
109
+ return l;
110
+ }
111
+
112
+ @Override
113
+ public void remove()
114
+ {
115
+ throw new UnsupportedOperationException();
116
+ }
117
+ }
118
+ }
@@ -0,0 +1,109 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.io.Writer;
4
+ import java.io.OutputStreamWriter;
5
+ import java.io.IOException;
6
+ import java.nio.charset.Charset;
7
+ import java.nio.charset.CharsetEncoder;
8
+ import java.nio.charset.CodingErrorAction;
9
+ import com.fasterxml.jackson.annotation.JacksonInject;
10
+ import org.embulk.config.Task;
11
+ import org.embulk.config.Config;
12
+ import org.embulk.config.ConfigDefault;
13
+ import org.embulk.spi.FileOutput;
14
+ import org.embulk.spi.BufferAllocator;
15
+
16
+ public class LineEncoder
17
+ implements AutoCloseable
18
+ {
19
+ // TODO optimize
20
+
21
+ public interface EncoderTask
22
+ extends Task
23
+ {
24
+ @Config("charset")
25
+ @ConfigDefault("\"utf-8\"")
26
+ public Charset getCharset();
27
+
28
+ @Config("newline")
29
+ @ConfigDefault("\"CRLF\"")
30
+ public Newline getNewline();
31
+
32
+ @JacksonInject
33
+ public BufferAllocator getBufferAllocator();
34
+ }
35
+
36
+ private final String newline;
37
+ private final FileOutputOutputStream outputStream;
38
+ private final Writer writer;
39
+
40
+ public LineEncoder(FileOutput out, EncoderTask task)
41
+ {
42
+ CharsetEncoder encoder = task.getCharset()
43
+ .newEncoder()
44
+ .onMalformedInput(CodingErrorAction.REPLACE) // TODO configurable?
45
+ .onUnmappableCharacter(CodingErrorAction.REPLACE); // TODO configurable?
46
+ this.newline = task.getNewline().getString();
47
+ this.outputStream = new FileOutputOutputStream(out, task.getBufferAllocator());
48
+ this.writer = new OutputStreamWriter(outputStream, encoder);
49
+ }
50
+
51
+ public void addNewLine()
52
+ {
53
+ try {
54
+ writer.append(newline);
55
+ } catch (IOException ex) {
56
+ // unexpected
57
+ throw new RuntimeException(ex);
58
+ }
59
+ }
60
+
61
+ public void addLine(String line)
62
+ {
63
+ try {
64
+ writer.append(line);
65
+ } catch (IOException ex) {
66
+ // unexpected
67
+ throw new RuntimeException(ex);
68
+ }
69
+ addNewLine();
70
+ }
71
+
72
+ public void addText(String text)
73
+ {
74
+ try {
75
+ writer.append(text);
76
+ } catch (IOException ex) {
77
+ // unexpected
78
+ throw new RuntimeException(ex);
79
+ }
80
+ }
81
+
82
+ public void nextFile()
83
+ {
84
+ try {
85
+ writer.flush();
86
+ } catch (IOException ex) {
87
+ // unexpected
88
+ throw new RuntimeException(ex);
89
+ }
90
+ outputStream.nextFile();
91
+ }
92
+
93
+ public void finish()
94
+ {
95
+ close(); // flush all remaining buffer in writer
96
+ outputStream.finish();
97
+ }
98
+
99
+ @Override
100
+ public void close()
101
+ {
102
+ try {
103
+ writer.close();
104
+ } catch (IOException ex) {
105
+ // unexpected
106
+ throw new RuntimeException(ex);
107
+ }
108
+ }
109
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.spi.util;
2
+
3
+ import java.util.List;
4
+ import java.util.ArrayList;
5
+ import java.util.Iterator;
6
+ import org.embulk.spi.Buffer;
7
+ import org.embulk.spi.FileInput;
8
+
9
+ public class ListFileInput
10
+ implements FileInput
11
+ {
12
+ private Iterator<? extends Iterable<Buffer>> files;
13
+ private Iterator<Buffer> currentBuffers;
14
+
15
+ public ListFileInput(Iterable<? extends Iterable<Buffer>> files)
16
+ {
17
+ this.files = files.iterator();
18
+ }
19
+
20
+ public boolean nextFile()
21
+ {
22
+ if (!files.hasNext()) {
23
+ return false;
24
+ }
25
+ currentBuffers = files.next().iterator();
26
+ return true;
27
+ }
28
+
29
+ public Buffer poll()
30
+ {
31
+ if (currentBuffers == null) {
32
+ throw new IllegalStateException("FileInput.nextFile is not called");
33
+ }
34
+ if (!currentBuffers.hasNext()) {
35
+ return null;
36
+ }
37
+ return currentBuffers.next();
38
+ }
39
+
40
+ public void close()
41
+ {
42
+ do {
43
+ while (true) {
44
+ Buffer b = poll();
45
+ if (b == null) {
46
+ break;
47
+ }
48
+ b.release();
49
+ }
50
+ } while (nextFile());
51
+ }
52
+ }