embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,6 @@
1
+ dependencies {
2
+ compile project(':embulk-core')
3
+ compile 'com.amazonaws:aws-java-sdk:1.5.2'
4
+
5
+ testCompile project(':embulk-core').sourceSets.test.output
6
+ }
@@ -0,0 +1,68 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
+ <modelVersion>4.0.0</modelVersion>
4
+
5
+ <parent>
6
+ <groupId>org.embulk</groupId>
7
+ <artifactId>embulk-parent</artifactId>
8
+ <version>0.1.0-SNAPSHOT</version>
9
+ </parent>
10
+
11
+ <artifactId>embulk-standards</artifactId>
12
+ <name>embulk-standards</name>
13
+
14
+ <dependencies>
15
+ <dependency>
16
+ <groupId>org.embulk</groupId>
17
+ <artifactId>embulk-core</artifactId>
18
+ </dependency>
19
+
20
+ <dependency>
21
+ <groupId>org.embulk</groupId>
22
+ <artifactId>embulk-core</artifactId>
23
+ <type>test-jar</type>
24
+ <scope>test</scope>
25
+ </dependency>
26
+
27
+ <dependency>
28
+ <groupId>com.google.guava</groupId>
29
+ <artifactId>guava</artifactId>
30
+ </dependency>
31
+
32
+ <dependency>
33
+ <groupId>com.google.inject</groupId>
34
+ <artifactId>guice</artifactId>
35
+ </dependency>
36
+
37
+ <dependency>
38
+ <groupId>javax.validation</groupId>
39
+ <artifactId>validation-api</artifactId>
40
+ </dependency>
41
+
42
+ <dependency>
43
+ <groupId>com.fasterxml.jackson.core</groupId>
44
+ <artifactId>jackson-databind</artifactId>
45
+ </dependency>
46
+
47
+ <dependency>
48
+ <groupId>org.slf4j</groupId>
49
+ <artifactId>slf4j-api</artifactId>
50
+ </dependency>
51
+
52
+ <dependency>
53
+ <groupId>com.amazonaws</groupId>
54
+ <artifactId>aws-java-sdk</artifactId>
55
+ <version>1.5.2</version>
56
+ </dependency>
57
+
58
+ <dependency>
59
+ <groupId>junit</groupId>
60
+ <artifactId>junit</artifactId>
61
+ </dependency>
62
+
63
+ <dependency>
64
+ <groupId>org.mockito</groupId>
65
+ <artifactId>mockito-core</artifactId>
66
+ </dependency>
67
+ </dependencies>
68
+ </project>
@@ -0,0 +1,158 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.collect.ImmutableBiMap;
4
+ import com.google.common.collect.ImmutableMap;
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.spi.type.TimestampType;
8
+ import org.embulk.spi.time.Timestamp;
9
+ import org.embulk.spi.time.TimestampFormatter;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.config.ConfigSource;
12
+ import org.embulk.spi.Column;
13
+ import org.embulk.spi.Schema;
14
+ import org.embulk.spi.SchemaVisitor;
15
+ import org.embulk.spi.FormatterPlugin;
16
+ import org.embulk.spi.Page;
17
+ import org.embulk.spi.PageOutput;
18
+ import org.embulk.spi.PageReader;
19
+ import org.embulk.spi.Exec;
20
+ import org.embulk.spi.FileOutput;
21
+ import org.embulk.spi.util.LineEncoder;
22
+
23
+ import java.util.Map;
24
+
25
+ public class CsvFormatterPlugin
26
+ implements FormatterPlugin
27
+ {
28
+ public interface PluginTask
29
+ extends LineEncoder.EncoderTask, TimestampFormatter.FormatterTask
30
+ {
31
+ @Config("header_line")
32
+ @ConfigDefault("true")
33
+ public boolean getHeaderLine();
34
+ }
35
+
36
+ @Override
37
+ public void transaction(ConfigSource config, Schema schema,
38
+ FormatterPlugin.Control control)
39
+ {
40
+ PluginTask task = config.loadConfig(PluginTask.class);
41
+ control.run(task.dump());
42
+ }
43
+
44
+ private Map<Integer, TimestampFormatter> newTimestampFormatters(
45
+ TimestampFormatter.FormatterTask task, Schema schema)
46
+ {
47
+ ImmutableMap.Builder<Integer, TimestampFormatter> builder = new ImmutableBiMap.Builder<>();
48
+ for (Column column : schema.getColumns()) {
49
+ if (column.getType() instanceof TimestampType) {
50
+ TimestampType tt = (TimestampType) column.getType();
51
+ builder.put(column.getIndex(), new TimestampFormatter(tt.getFormat(), task));
52
+ }
53
+ }
54
+ return builder.build();
55
+ }
56
+
57
+ @Override
58
+ public PageOutput open(TaskSource taskSource, final Schema schema,
59
+ FileOutput output)
60
+ {
61
+ final PluginTask task = taskSource.loadTask(PluginTask.class);
62
+ final LineEncoder encoder = new LineEncoder(output, task);
63
+ final Map<Integer, TimestampFormatter> timestampFormatters =
64
+ newTimestampFormatters(task, schema);
65
+
66
+ // create a file
67
+ encoder.nextFile();
68
+
69
+ // write header
70
+ if (task.getHeaderLine()) {
71
+ writeHeader(schema, encoder);
72
+ }
73
+
74
+ return new PageOutput() {
75
+ private final PageReader pageReader = new PageReader(schema);
76
+
77
+ public void add(Page page)
78
+ {
79
+ pageReader.setPage(page);
80
+ while (pageReader.nextRecord()) {
81
+ schema.visitColumns(new SchemaVisitor() {
82
+ public void booleanColumn(Column column)
83
+ {
84
+ addDelimiter(column);
85
+ if (!pageReader.isNull(column)) {
86
+ encoder.addText(Boolean.toString(pageReader.getBoolean(column)));
87
+ }
88
+ }
89
+
90
+ public void longColumn(Column column)
91
+ {
92
+ addDelimiter(column);
93
+ if (!pageReader.isNull(column)) {
94
+ encoder.addText(Long.toString(pageReader.getLong(column)));
95
+ }
96
+ }
97
+
98
+ public void doubleColumn(Column column)
99
+ {
100
+ addDelimiter(column);
101
+ if (!pageReader.isNull(column)) {
102
+ encoder.addText(Double.toString(pageReader.getDouble(column)));
103
+ }
104
+ }
105
+
106
+ public void stringColumn(Column column)
107
+ {
108
+ addDelimiter(column);
109
+ if (!pageReader.isNull(column)) {
110
+ // TODO escape and quoting
111
+ encoder.addText(pageReader.getString(column));
112
+ }
113
+ }
114
+
115
+ public void timestampColumn(Column column)
116
+ {
117
+ addDelimiter(column);
118
+ if (!pageReader.isNull(column)) {
119
+ Timestamp value = pageReader.getTimestamp(column);
120
+ encoder.addText(timestampFormatters.get(column.getIndex()).format(value));
121
+ }
122
+ }
123
+
124
+ private void addDelimiter(Column column)
125
+ {
126
+ if (column.getIndex() != 0) {
127
+ encoder.addText(",");
128
+ }
129
+ }
130
+ });
131
+
132
+ encoder.addNewLine();
133
+ }
134
+ }
135
+
136
+ public void finish()
137
+ {
138
+ encoder.finish();
139
+ }
140
+
141
+ public void close()
142
+ {
143
+ encoder.close();
144
+ }
145
+ };
146
+ }
147
+
148
+ private void writeHeader(Schema schema, LineEncoder encoder)
149
+ {
150
+ for (Column column : schema.getColumns()) {
151
+ if (column.getIndex() != 0) {
152
+ encoder.addText(",");
153
+ }
154
+ encoder.addText(column.getName());
155
+ }
156
+ encoder.addNewLine();
157
+ }
158
+ }
@@ -0,0 +1,233 @@
1
+ package org.embulk.standards;
2
+
3
+ import com.google.common.base.Preconditions;
4
+ import com.google.common.collect.ImmutableMap;
5
+ import com.google.common.base.Optional;
6
+ import org.embulk.config.Task;
7
+ import org.embulk.config.Config;
8
+ import org.embulk.config.ConfigDefault;
9
+ import org.embulk.config.ConfigSource;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.spi.type.TimestampType;
12
+ import org.embulk.spi.time.TimestampParser;
13
+ import org.embulk.spi.time.TimestampParseException;
14
+ import org.embulk.spi.Column;
15
+ import org.embulk.spi.Schema;
16
+ import org.embulk.spi.SchemaConfig;
17
+ import org.embulk.spi.SchemaVisitor;
18
+ import org.embulk.spi.PageBuilder;
19
+ import org.embulk.spi.ParserPlugin;
20
+ import org.embulk.spi.Exec;
21
+ import org.embulk.spi.FileInput;
22
+ import org.embulk.spi.PageOutput;
23
+ import org.embulk.spi.BufferAllocator;
24
+ import org.embulk.spi.util.LineDecoder;
25
+ import org.slf4j.Logger;
26
+
27
+ import java.util.Map;
28
+
29
+ public class CsvParserPlugin
30
+ implements ParserPlugin
31
+ {
32
+ public interface PluginTask
33
+ extends Task, LineDecoder.DecoderTask, TimestampParser.ParserTask
34
+ {
35
+ @Config("columns")
36
+ public SchemaConfig getSchemaConfig();
37
+
38
+ @Config("header_line") // how to set default value?? TODO @Default("true")
39
+ @ConfigDefault("false")
40
+ public boolean getHeaderLine();
41
+
42
+ @Config("delimiter")
43
+ @ConfigDefault("\",\"")
44
+ public char getDelimiterChar();
45
+
46
+ @Config("quote")
47
+ @ConfigDefault("\"\\\"\"")
48
+ public char getQuoteChar();
49
+
50
+ @Config("escape")
51
+ @ConfigDefault("\"\\\\\"")
52
+ public char getEscapeChar();
53
+
54
+ // Null value handling: if the CsvParser found 'non-quoted empty string's,
55
+ // it replaces them to string that users specified like "\N", "NULL".
56
+ @Config("null_string")
57
+ @ConfigDefault("null")
58
+ public Optional<String> getNullString();
59
+
60
+ @Config("trim_if_not_quoted")
61
+ @ConfigDefault("false")
62
+ public boolean getTrimIfNotQuoted();
63
+
64
+ @Config("max_quoted_size_limit")
65
+ @ConfigDefault("131072") //128kB
66
+ public long getMaxQuotedSizeLimit();
67
+ }
68
+
69
+ private final Logger log;
70
+
71
+ public CsvParserPlugin()
72
+ {
73
+ log = Exec.getLogger(CsvParserPlugin.class);
74
+ }
75
+
76
+ @Override
77
+ public void transaction(ConfigSource config, ParserPlugin.Control control)
78
+ {
79
+ PluginTask task = config.loadConfig(PluginTask.class);
80
+ control.run(task.dump(), task.getSchemaConfig().toSchema());
81
+ }
82
+
83
+ private Map<Integer, TimestampParser> newTimestampParsers(
84
+ TimestampParser.ParserTask task, Schema schema)
85
+ {
86
+ ImmutableMap.Builder<Integer, TimestampParser> builder = new ImmutableMap.Builder<>();
87
+ for (Column column : schema.getColumns()) {
88
+ if (column.getType() instanceof TimestampType) {
89
+ TimestampType tt = (TimestampType) column.getType();
90
+ builder.put(column.getIndex(), new TimestampParser(tt.getFormat(), task));
91
+ }
92
+ }
93
+ return builder.build();
94
+ }
95
+
96
+ @Override
97
+ public void run(TaskSource taskSource, final Schema schema,
98
+ FileInput input, PageOutput output)
99
+ {
100
+ PluginTask task = taskSource.loadTask(PluginTask.class);
101
+ final Map<Integer, TimestampParser> timestampFormatters = newTimestampParsers(task, schema);
102
+ final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
103
+ final String nullStringOrNull = task.getNullString().orNull();
104
+ boolean skipHeaderLine = task.getHeaderLine();
105
+
106
+ try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
107
+ while (tokenizer.nextFile()) {
108
+ if (skipHeaderLine) {
109
+ // skip the first line
110
+ if (tokenizer.nextRecord()) {
111
+ for (int i=0; i < schema.getColumnCount(); i++) {
112
+ tokenizer.nextColumn(); // TODO check return value?
113
+ }
114
+ }
115
+ }
116
+
117
+ while (true) {
118
+ try {
119
+ if (!tokenizer.nextRecord()) {
120
+ break;
121
+ }
122
+
123
+ schema.visitColumns(new SchemaVisitor() {
124
+ public void booleanColumn(Column column)
125
+ {
126
+ String v = nextColumn(schema, tokenizer, nullStringOrNull);
127
+ if (v == null) {
128
+ pageBuilder.setNull(column);
129
+ } else {
130
+ pageBuilder.setBoolean(column, Boolean.parseBoolean(v));
131
+ }
132
+ }
133
+
134
+ public void longColumn(Column column)
135
+ {
136
+ String v = nextColumn(schema, tokenizer, nullStringOrNull);
137
+ if (v == null) {
138
+ pageBuilder.setNull(column);
139
+ } else {
140
+ try {
141
+ pageBuilder.setLong(column, Long.parseLong(v));
142
+ } catch (NumberFormatException e) {
143
+ // TODO support default value
144
+ throw new CsvRecordValidateException(e);
145
+ }
146
+ }
147
+ }
148
+
149
+ public void doubleColumn(Column column)
150
+ {
151
+ String v = nextColumn(schema, tokenizer, nullStringOrNull);
152
+ if (v == null) {
153
+ pageBuilder.setNull(column);
154
+ } else {
155
+ try {
156
+ pageBuilder.setDouble(column, Double.parseDouble(v));
157
+ } catch (NumberFormatException e) {
158
+ // TODO support default value
159
+ throw new CsvRecordValidateException(e);
160
+ }
161
+ }
162
+ }
163
+
164
+ public void stringColumn(Column column)
165
+ {
166
+ String v = nextColumn(schema, tokenizer, nullStringOrNull);
167
+ if (v == null) {
168
+ pageBuilder.setNull(column);
169
+ } else {
170
+ pageBuilder.setString(column, v);
171
+ }
172
+ }
173
+
174
+ public void timestampColumn(Column column)
175
+ {
176
+ String v = nextColumn(schema, tokenizer, nullStringOrNull);
177
+ if (v == null) {
178
+ pageBuilder.setNull(column);
179
+ } else {
180
+ try {
181
+ pageBuilder.setTimestamp(column, (timestampFormatters.get(column.getIndex()).parse(v)));
182
+ } catch (TimestampParseException e) {
183
+ // TODO support default value
184
+ throw new CsvRecordValidateException(e);
185
+ }
186
+ }
187
+ }
188
+ });
189
+ pageBuilder.addRecord();
190
+
191
+ } catch (Exception e) {
192
+ // TODO logging
193
+ long lineNumber = tokenizer.getCurrentLineNumber();
194
+ String skippedLine = tokenizer.skipCurrentLine();
195
+ log.warn(String.format("Skipped (line %d): %s", lineNumber, skippedLine), e);
196
+ //exec.notice().skippedLine(skippedLine);
197
+ }
198
+ }
199
+ }
200
+
201
+ pageBuilder.finish();
202
+ }
203
+ }
204
+
205
+ private static String nextColumn(Schema schema, CsvTokenizer tokenizer, String nullStringOrNull)
206
+ {
207
+ String v = tokenizer.nextColumn();
208
+ if (v == null) {
209
+ throw new RuntimeException(String.format("Expected %d columns but line %d has fewer number of columns",
210
+ schema.getColumnCount(), tokenizer.getCurrentLineNumber()));
211
+ }
212
+
213
+ if (!v.isEmpty()) {
214
+ if (v.equals(nullStringOrNull)) {
215
+ return null;
216
+ }
217
+ return v;
218
+ } else if (tokenizer.wasQuotedColumn()) {
219
+ return "";
220
+ } else {
221
+ return null;
222
+ }
223
+ }
224
+
225
+ static class CsvRecordValidateException
226
+ extends RuntimeException
227
+ {
228
+ CsvRecordValidateException(Throwable cause)
229
+ {
230
+ super(cause);
231
+ }
232
+ }
233
+ }