embulk 0.8.39-java → 0.10.28-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (604) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +1 -1
  3. data/LICENSE +202 -0
  4. data/embulk.gemspec +34 -33
  5. data/lib/embulk.rb +5 -61
  6. data/lib/embulk/buffer.rb +1 -2
  7. data/lib/embulk/gem_version.rb +3 -0
  8. data/lib/embulk/guess/charset.rb +2 -11
  9. data/lib/embulk/java/bootstrap.rb +0 -1
  10. data/lib/embulk/java_plugin.rb +9 -1
  11. data/lib/embulk/logger.rb +2 -1
  12. data/lib/embulk/page_builder.rb +2 -2
  13. data/lib/embulk/plugin.rb +2 -2
  14. data/lib/embulk/schema.rb +3 -4
  15. data/lib/embulk/version.rb +43 -19
  16. metadata +23 -762
  17. data/.gitignore +0 -15
  18. data/.ruby-version +0 -1
  19. data/.travis.yml +0 -28
  20. data/COPYING +0 -14
  21. data/Gemfile.lock +0 -30
  22. data/README.md +0 -229
  23. data/Rakefile +0 -26
  24. data/appveyor.yml +0 -28
  25. data/bin/embulk +0 -139
  26. data/build.gradle +0 -419
  27. data/embulk-cli/build.gradle +0 -9
  28. data/embulk-cli/src/main/bat/selfrun.bat +0 -107
  29. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkArguments.java +0 -54
  30. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkCommandLine.java +0 -227
  31. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkCommandLineException.java +0 -25
  32. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkExample.java +0 -86
  33. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkMigrate.java +0 -480
  34. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkNew.java +0 -419
  35. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkRun.java +0 -786
  36. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkSelfUpdate.java +0 -235
  37. data/embulk-cli/src/main/java/org/embulk/cli/EmbulkSubcommand.java +0 -47
  38. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +0 -31
  39. data/embulk-cli/src/main/java/org/embulk/cli/parse/AbstractHelpLineDefinition.java +0 -15
  40. data/embulk-cli/src/main/java/org/embulk/cli/parse/CliHelpFormatterWithHelpMessages.java +0 -141
  41. data/embulk-cli/src/main/java/org/embulk/cli/parse/CliOptionsWithHelpMessages.java +0 -45
  42. data/embulk-cli/src/main/java/org/embulk/cli/parse/EmbulkCommandLineHelpRequired.java +0 -10
  43. data/embulk-cli/src/main/java/org/embulk/cli/parse/EmbulkCommandLineParseException.java +0 -25
  44. data/embulk-cli/src/main/java/org/embulk/cli/parse/EmbulkCommandLineParser.java +0 -187
  45. data/embulk-cli/src/main/java/org/embulk/cli/parse/HelpMessageAsCliOption.java +0 -36
  46. data/embulk-cli/src/main/java/org/embulk/cli/parse/HelpMessageLineDefinition.java +0 -20
  47. data/embulk-cli/src/main/java/org/embulk/cli/parse/OptionBehavior.java +0 -39
  48. data/embulk-cli/src/main/java/org/embulk/cli/parse/OptionDefinition.java +0 -120
  49. data/embulk-cli/src/main/sh/selfrun.sh +0 -60
  50. data/embulk-cli/src/test/java/org/embulk/cli/DummyMain.java +0 -23
  51. data/embulk-cli/src/test/java/org/embulk/cli/SelfrunTest.java +0 -296
  52. data/embulk-core/build.gradle +0 -83
  53. data/embulk-core/src/main/java/org/embulk/EmbulkEmbed.java +0 -321
  54. data/embulk-core/src/main/java/org/embulk/EmbulkRunner.java +0 -531
  55. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +0 -78
  56. data/embulk-core/src/main/java/org/embulk/EmbulkSetup.java +0 -49
  57. data/embulk-core/src/main/java/org/embulk/EmbulkVersion.java +0 -109
  58. data/embulk-core/src/main/java/org/embulk/command/PreviewPrinter.java +0 -87
  59. data/embulk-core/src/main/java/org/embulk/command/TablePreviewPrinter.java +0 -107
  60. data/embulk-core/src/main/java/org/embulk/command/VerticalPreviewPrinter.java +0 -47
  61. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +0 -36
  62. data/embulk-core/src/main/java/org/embulk/config/Config.java +0 -15
  63. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +0 -15
  64. data/embulk-core/src/main/java/org/embulk/config/ConfigDiff.java +0 -32
  65. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +0 -26
  66. data/embulk-core/src/main/java/org/embulk/config/ConfigInject.java +0 -14
  67. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +0 -149
  68. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +0 -34
  69. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +0 -41
  70. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +0 -243
  71. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +0 -84
  72. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +0 -20
  73. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +0 -123
  74. data/embulk-core/src/main/java/org/embulk/config/Task.java +0 -10
  75. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +0 -180
  76. data/embulk-core/src/main/java/org/embulk/config/TaskReport.java +0 -32
  77. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +0 -349
  78. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +0 -34
  79. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +0 -38
  80. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +0 -24
  81. data/embulk-core/src/main/java/org/embulk/config/UserDataException.java +0 -4
  82. data/embulk-core/src/main/java/org/embulk/config/UserDataExceptions.java +0 -17
  83. data/embulk-core/src/main/java/org/embulk/config/YamlTagResolver.java +0 -53
  84. data/embulk-core/src/main/java/org/embulk/exec/BufferFileInputPlugin.java +0 -88
  85. data/embulk-core/src/main/java/org/embulk/exec/BulkLoader.java +0 -754
  86. data/embulk-core/src/main/java/org/embulk/exec/ConfigurableGuessInputPlugin.java +0 -9
  87. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +0 -53
  88. data/embulk-core/src/main/java/org/embulk/exec/ExecutionInterruptedException.java +0 -10
  89. data/embulk-core/src/main/java/org/embulk/exec/ExecutionResult.java +0 -33
  90. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +0 -43
  91. data/embulk-core/src/main/java/org/embulk/exec/ForGuess.java +0 -16
  92. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +0 -16
  93. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +0 -309
  94. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutorPlugin.java +0 -563
  95. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +0 -68
  96. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +0 -10
  97. data/embulk-core/src/main/java/org/embulk/exec/PartialExecutionException.java +0 -26
  98. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +0 -77
  99. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +0 -222
  100. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +0 -27
  101. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +0 -17
  102. data/embulk-core/src/main/java/org/embulk/exec/ResumeState.java +0 -100
  103. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +0 -215
  104. data/embulk-core/src/main/java/org/embulk/exec/SetCurrentThreadName.java +0 -22
  105. data/embulk-core/src/main/java/org/embulk/exec/SkipTransactionException.java +0 -23
  106. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +0 -24
  107. data/embulk-core/src/main/java/org/embulk/exec/TempFileAllocator.java +0 -35
  108. data/embulk-core/src/main/java/org/embulk/exec/TransactionStage.java +0 -27
  109. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +0 -76
  110. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +0 -395
  111. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +0 -17
  112. data/embulk-core/src/main/java/org/embulk/plugin/DefaultPluginType.java +0 -50
  113. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +0 -100
  114. data/embulk-core/src/main/java/org/embulk/plugin/MavenPluginType.java +0 -112
  115. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoader.java +0 -993
  116. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderFactory.java +0 -16
  117. data/embulk-core/src/main/java/org/embulk/plugin/PluginClassLoaderModule.java +0 -102
  118. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +0 -78
  119. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +0 -49
  120. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +0 -25
  121. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +0 -122
  122. data/embulk-core/src/main/java/org/embulk/plugin/compat/InputPluginWrapper.java +0 -102
  123. data/embulk-core/src/main/java/org/embulk/plugin/compat/PluginWrappers.java +0 -30
  124. data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileInputWrapper.java +0 -96
  125. data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalFileOutputWrapper.java +0 -102
  126. data/embulk-core/src/main/java/org/embulk/plugin/compat/TransactionalPageOutputWrapper.java +0 -95
  127. data/embulk-core/src/main/java/org/embulk/plugin/jar/InvalidJarPluginException.java +0 -14
  128. data/embulk-core/src/main/java/org/embulk/plugin/jar/JarPluginLoader.java +0 -232
  129. data/embulk-core/src/main/java/org/embulk/plugin/maven/MavenArtifactFinder.java +0 -134
  130. data/embulk-core/src/main/java/org/embulk/plugin/maven/MavenArtifactNotFoundException.java +0 -20
  131. data/embulk-core/src/main/java/org/embulk/plugin/maven/MavenPluginSource.java +0 -187
  132. data/embulk-core/src/main/java/org/embulk/plugin/maven/MavenPluginSourceModule.java +0 -22
  133. data/embulk-core/src/main/java/org/embulk/plugin/maven/MavenRepositoryNotFoundException.java +0 -31
  134. data/embulk-core/src/main/java/org/embulk/spi/AbortTransactionResource.java +0 -36
  135. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +0 -148
  136. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +0 -8
  137. data/embulk-core/src/main/java/org/embulk/spi/CloseResource.java +0 -42
  138. data/embulk-core/src/main/java/org/embulk/spi/Column.java +0 -95
  139. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +0 -112
  140. data/embulk-core/src/main/java/org/embulk/spi/ColumnVisitor.java +0 -16
  141. data/embulk-core/src/main/java/org/embulk/spi/DataException.java +0 -23
  142. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +0 -16
  143. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +0 -16
  144. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +0 -113
  145. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +0 -6
  146. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +0 -220
  147. data/embulk-core/src/main/java/org/embulk/spi/ExecutorPlugin.java +0 -19
  148. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +0 -44
  149. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +0 -11
  150. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +0 -30
  151. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +0 -169
  152. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +0 -13
  153. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +0 -28
  154. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +0 -199
  155. data/embulk-core/src/main/java/org/embulk/spi/FilterPlugin.java +0 -18
  156. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +0 -18
  157. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +0 -9
  158. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +0 -33
  159. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +0 -29
  160. data/embulk-core/src/main/java/org/embulk/spi/Page.java +0 -86
  161. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +0 -696
  162. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +0 -47
  163. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +0 -11
  164. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +0 -248
  165. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +0 -17
  166. data/embulk-core/src/main/java/org/embulk/spi/ProcessState.java +0 -10
  167. data/embulk-core/src/main/java/org/embulk/spi/ProcessTask.java +0 -117
  168. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +0 -139
  169. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +0 -93
  170. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfigException.java +0 -22
  171. data/embulk-core/src/main/java/org/embulk/spi/TaskState.java +0 -81
  172. data/embulk-core/src/main/java/org/embulk/spi/TempFileException.java +0 -19
  173. data/embulk-core/src/main/java/org/embulk/spi/TempFileSpace.java +0 -88
  174. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +0 -10
  175. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +0 -17
  176. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +0 -19
  177. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +0 -17
  178. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParseException.java +0 -17
  179. data/embulk-core/src/main/java/org/embulk/spi/json/JsonParser.java +0 -233
  180. data/embulk-core/src/main/java/org/embulk/spi/json/RubyValueApi.java +0 -100
  181. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +0 -55
  182. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +0 -180
  183. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +0 -158
  184. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +0 -125
  185. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +0 -12
  186. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +0 -310
  187. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +0 -49
  188. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +0 -58
  189. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +0 -12
  190. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +0 -12
  191. data/embulk-core/src/main/java/org/embulk/spi/type/JsonType.java +0 -14
  192. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +0 -12
  193. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +0 -12
  194. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +0 -41
  195. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +0 -15
  196. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +0 -45
  197. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +0 -16
  198. data/embulk-core/src/main/java/org/embulk/spi/unit/ByteSize.java +0 -156
  199. data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFile.java +0 -106
  200. data/embulk-core/src/main/java/org/embulk/spi/unit/LocalFileSerDe.java +0 -113
  201. data/embulk-core/src/main/java/org/embulk/spi/unit/ToString.java +0 -54
  202. data/embulk-core/src/main/java/org/embulk/spi/unit/ToStringMap.java +0 -34
  203. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +0 -55
  204. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +0 -81
  205. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnNotFoundException.java +0 -10
  206. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetter.java +0 -21
  207. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicColumnSetterFactory.java +0 -134
  208. data/embulk-core/src/main/java/org/embulk/spi/util/DynamicPageBuilder.java +0 -200
  209. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +0 -81
  210. data/embulk-core/src/main/java/org/embulk/spi/util/Executors.java +0 -93
  211. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +0 -111
  212. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +0 -119
  213. data/embulk-core/src/main/java/org/embulk/spi/util/Filters.java +0 -100
  214. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +0 -189
  215. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamTransactionalFileInput.java +0 -25
  216. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +0 -65
  217. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +0 -157
  218. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +0 -123
  219. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +0 -52
  220. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +0 -38
  221. data/embulk-core/src/main/java/org/embulk/spi/util/OutputStreamFileOutput.java +0 -88
  222. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +0 -126
  223. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +0 -149
  224. data/embulk-core/src/main/java/org/embulk/spi/util/ResumableInputStream.java +0 -168
  225. data/embulk-core/src/main/java/org/embulk/spi/util/RetryExecutor.java +0 -130
  226. data/embulk-core/src/main/java/org/embulk/spi/util/Timestamps.java +0 -53
  227. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/AbstractDynamicColumnSetter.java +0 -94
  228. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/BooleanColumnSetter.java +0 -71
  229. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DefaultValueSetter.java +0 -20
  230. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/DoubleColumnSetter.java +0 -68
  231. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/JsonColumnSetter.java +0 -64
  232. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/LongColumnSetter.java +0 -78
  233. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/NullDefaultValueSetter.java +0 -39
  234. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/SkipColumnSetter.java +0 -68
  235. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/StringColumnSetter.java +0 -63
  236. data/embulk-core/src/main/java/org/embulk/spi/util/dynamic/TimestampColumnSetter.java +0 -72
  237. data/embulk-core/src/main/resources/embulk/logback-color.xml +0 -72
  238. data/embulk-core/src/main/resources/embulk/logback-console.xml +0 -14
  239. data/embulk-core/src/main/resources/embulk/logback-file.xml +0 -23
  240. data/embulk-core/src/main/resources/embulk/parent_first_packages.properties +0 -73
  241. data/embulk-core/src/main/resources/embulk/parent_first_resources.properties +0 -29
  242. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +0 -122
  243. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +0 -72
  244. data/embulk-core/src/test/java/org/embulk/RandomManager.java +0 -53
  245. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +0 -23
  246. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +0 -17
  247. data/embulk-core/src/test/java/org/embulk/config/TestConfigLoader.java +0 -66
  248. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +0 -114
  249. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +0 -70
  250. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +0 -58
  251. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +0 -83
  252. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginTypeSerDe.java +0 -67
  253. data/embulk-core/src/test/java/org/embulk/plugin/jar/ExampleJarSpiV0.java +0 -9
  254. data/embulk-core/src/test/java/org/embulk/plugin/jar/JarBuilder.java +0 -101
  255. data/embulk-core/src/test/java/org/embulk/plugin/jar/TestJarPluginLoader.java +0 -60
  256. data/embulk-core/src/test/java/org/embulk/plugin/maven/TestMavenArtifactFinder.java +0 -41
  257. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +0 -63
  258. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +0 -108
  259. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +0 -80
  260. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +0 -59
  261. data/embulk-core/src/test/java/org/embulk/spi/TestBuffer.java +0 -24
  262. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +0 -89
  263. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +0 -199
  264. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +0 -221
  265. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +0 -188
  266. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +0 -411
  267. data/embulk-core/src/test/java/org/embulk/spi/json/TestJsonParser.java +0 -102
  268. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +0 -116
  269. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +0 -75
  270. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParserDeprecated.java +0 -67
  271. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampParser.java +0 -247
  272. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +0 -45
  273. data/embulk-core/src/test/java/org/embulk/spi/unit/TestByteSize.java +0 -79
  274. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +0 -237
  275. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +0 -123
  276. data/embulk-core/src/test/resources/m2.test/.gitignore +0 -1
  277. data/embulk-core/src/test/resources/m2.test/org/embulk/example/embulk-example-maven-artifact/0.1.2/embulk-example-maven-artifact-0.1.2.jar +0 -0
  278. data/embulk-core/src/test/resources/m2.test/org/embulk/example/embulk-example-maven-artifact/0.1.2/embulk-example-maven-artifact-0.1.2.jar.sha1 +0 -1
  279. data/embulk-core/src/test/resources/m2.test/org/embulk/example/embulk-example-maven-artifact/0.1.2/embulk-example-maven-artifact-0.1.2.pom +0 -9
  280. data/embulk-core/src/test/resources/m2.test/org/embulk/example/embulk-example-maven-artifact/0.1.2/embulk-example-maven-artifact-0.1.2.pom.sha1 +0 -1
  281. data/embulk-docs/Makefile +0 -178
  282. data/embulk-docs/build.gradle +0 -41
  283. data/embulk-docs/make.bat +0 -243
  284. data/embulk-docs/push-gh-pages.sh +0 -49
  285. data/embulk-docs/src/_static/embulk-architecture.png +0 -0
  286. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-oneline-tr.png +0 -0
  287. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-oneline-wt.png +0 -0
  288. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-oneline.ai +0 -396
  289. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-sq-tr-small.png +0 -0
  290. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-sq-tr.png +0 -0
  291. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-sq-wt.png +0 -0
  292. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-sq.ai +0 -417
  293. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-sq.svg +0 -1
  294. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-symbol-tr.png +0 -0
  295. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-symbol-wt.png +0 -0
  296. data/embulk-docs/src/_static/embulk-logo-v2/embulk-logo-v2-symbol.ai +1 -394
  297. data/embulk-docs/src/_static/embulk-logo.svg +0 -133
  298. data/embulk-docs/src/built-in.rst +0 -1017
  299. data/embulk-docs/src/conf.py +0 -260
  300. data/embulk-docs/src/customization.rst +0 -184
  301. data/embulk-docs/src/developers/index.rst +0 -45
  302. data/embulk-docs/src/index.rst +0 -102
  303. data/embulk-docs/src/logo.rst +0 -27
  304. data/embulk-docs/src/recipe.rst +0 -8
  305. data/embulk-docs/src/recipe/scheduled-csv-load-to-elasticsearch-kibana5.rst +0 -163
  306. data/embulk-docs/src/release.rst +0 -109
  307. data/embulk-docs/src/release/release-0.1.0.rst +0 -8
  308. data/embulk-docs/src/release/release-0.2.0.rst +0 -16
  309. data/embulk-docs/src/release/release-0.2.1.rst +0 -19
  310. data/embulk-docs/src/release/release-0.3.0.rst +0 -34
  311. data/embulk-docs/src/release/release-0.3.1.rst +0 -11
  312. data/embulk-docs/src/release/release-0.3.2.rst +0 -15
  313. data/embulk-docs/src/release/release-0.4.0.rst +0 -74
  314. data/embulk-docs/src/release/release-0.4.1.rst +0 -18
  315. data/embulk-docs/src/release/release-0.4.10.rst +0 -17
  316. data/embulk-docs/src/release/release-0.4.2.rst +0 -18
  317. data/embulk-docs/src/release/release-0.4.3.rst +0 -34
  318. data/embulk-docs/src/release/release-0.4.4.rst +0 -39
  319. data/embulk-docs/src/release/release-0.4.5.rst +0 -24
  320. data/embulk-docs/src/release/release-0.4.6.rst +0 -30
  321. data/embulk-docs/src/release/release-0.4.7.rst +0 -16
  322. data/embulk-docs/src/release/release-0.4.8.rst +0 -15
  323. data/embulk-docs/src/release/release-0.4.9.rst +0 -23
  324. data/embulk-docs/src/release/release-0.5.0.rst +0 -89
  325. data/embulk-docs/src/release/release-0.5.1.rst +0 -13
  326. data/embulk-docs/src/release/release-0.5.2.rst +0 -30
  327. data/embulk-docs/src/release/release-0.5.3.rst +0 -22
  328. data/embulk-docs/src/release/release-0.5.4.rst +0 -24
  329. data/embulk-docs/src/release/release-0.5.5.rst +0 -18
  330. data/embulk-docs/src/release/release-0.6.0.rst +0 -34
  331. data/embulk-docs/src/release/release-0.6.1.rst +0 -11
  332. data/embulk-docs/src/release/release-0.6.10.rst +0 -15
  333. data/embulk-docs/src/release/release-0.6.11.rst +0 -19
  334. data/embulk-docs/src/release/release-0.6.12.rst +0 -31
  335. data/embulk-docs/src/release/release-0.6.13.rst +0 -23
  336. data/embulk-docs/src/release/release-0.6.14.rst +0 -47
  337. data/embulk-docs/src/release/release-0.6.15.rst +0 -26
  338. data/embulk-docs/src/release/release-0.6.16.rst +0 -26
  339. data/embulk-docs/src/release/release-0.6.17.rst +0 -39
  340. data/embulk-docs/src/release/release-0.6.18.rst +0 -14
  341. data/embulk-docs/src/release/release-0.6.19.rst +0 -18
  342. data/embulk-docs/src/release/release-0.6.2.rst +0 -17
  343. data/embulk-docs/src/release/release-0.6.20.rst +0 -19
  344. data/embulk-docs/src/release/release-0.6.21.rst +0 -20
  345. data/embulk-docs/src/release/release-0.6.22.rst +0 -26
  346. data/embulk-docs/src/release/release-0.6.23.rst +0 -17
  347. data/embulk-docs/src/release/release-0.6.24.rst +0 -13
  348. data/embulk-docs/src/release/release-0.6.25.rst +0 -12
  349. data/embulk-docs/src/release/release-0.6.26.rst +0 -17
  350. data/embulk-docs/src/release/release-0.6.27.rst +0 -11
  351. data/embulk-docs/src/release/release-0.6.3.rst +0 -23
  352. data/embulk-docs/src/release/release-0.6.4.rst +0 -13
  353. data/embulk-docs/src/release/release-0.6.5.rst +0 -17
  354. data/embulk-docs/src/release/release-0.6.6.rst +0 -17
  355. data/embulk-docs/src/release/release-0.6.7.rst +0 -17
  356. data/embulk-docs/src/release/release-0.6.8.rst +0 -24
  357. data/embulk-docs/src/release/release-0.6.9.rst +0 -24
  358. data/embulk-docs/src/release/release-0.7.0.rst +0 -96
  359. data/embulk-docs/src/release/release-0.7.1.rst +0 -22
  360. data/embulk-docs/src/release/release-0.7.10.rst +0 -13
  361. data/embulk-docs/src/release/release-0.7.11.rst +0 -12
  362. data/embulk-docs/src/release/release-0.7.2.rst +0 -25
  363. data/embulk-docs/src/release/release-0.7.3.rst +0 -21
  364. data/embulk-docs/src/release/release-0.7.4.rst +0 -14
  365. data/embulk-docs/src/release/release-0.7.5.rst +0 -22
  366. data/embulk-docs/src/release/release-0.7.6.rst +0 -18
  367. data/embulk-docs/src/release/release-0.7.7.rst +0 -13
  368. data/embulk-docs/src/release/release-0.7.8.rst +0 -14
  369. data/embulk-docs/src/release/release-0.7.9.rst +0 -14
  370. data/embulk-docs/src/release/release-0.8.0.rst +0 -74
  371. data/embulk-docs/src/release/release-0.8.1.rst +0 -18
  372. data/embulk-docs/src/release/release-0.8.10.rst +0 -35
  373. data/embulk-docs/src/release/release-0.8.11.rst +0 -12
  374. data/embulk-docs/src/release/release-0.8.12.rst +0 -12
  375. data/embulk-docs/src/release/release-0.8.13.rst +0 -12
  376. data/embulk-docs/src/release/release-0.8.14.rst +0 -31
  377. data/embulk-docs/src/release/release-0.8.15.rst +0 -17
  378. data/embulk-docs/src/release/release-0.8.16.rst +0 -43
  379. data/embulk-docs/src/release/release-0.8.17.rst +0 -11
  380. data/embulk-docs/src/release/release-0.8.18.rst +0 -27
  381. data/embulk-docs/src/release/release-0.8.19.rst +0 -43
  382. data/embulk-docs/src/release/release-0.8.2.rst +0 -19
  383. data/embulk-docs/src/release/release-0.8.20.rst +0 -11
  384. data/embulk-docs/src/release/release-0.8.21.rst +0 -17
  385. data/embulk-docs/src/release/release-0.8.22.rst +0 -15
  386. data/embulk-docs/src/release/release-0.8.23.rst +0 -14
  387. data/embulk-docs/src/release/release-0.8.24.rst +0 -15
  388. data/embulk-docs/src/release/release-0.8.25.rst +0 -14
  389. data/embulk-docs/src/release/release-0.8.26.rst +0 -16
  390. data/embulk-docs/src/release/release-0.8.27.rst +0 -15
  391. data/embulk-docs/src/release/release-0.8.28.rst +0 -14
  392. data/embulk-docs/src/release/release-0.8.29.rst +0 -14
  393. data/embulk-docs/src/release/release-0.8.3.rst +0 -15
  394. data/embulk-docs/src/release/release-0.8.30.rst +0 -14
  395. data/embulk-docs/src/release/release-0.8.31.rst +0 -12
  396. data/embulk-docs/src/release/release-0.8.32.rst +0 -14
  397. data/embulk-docs/src/release/release-0.8.33.rst +0 -13
  398. data/embulk-docs/src/release/release-0.8.34.rst +0 -12
  399. data/embulk-docs/src/release/release-0.8.35.rst +0 -12
  400. data/embulk-docs/src/release/release-0.8.36.rst +0 -32
  401. data/embulk-docs/src/release/release-0.8.37.rst +0 -20
  402. data/embulk-docs/src/release/release-0.8.38.rst +0 -12
  403. data/embulk-docs/src/release/release-0.8.39.rst +0 -12
  404. data/embulk-docs/src/release/release-0.8.4.rst +0 -18
  405. data/embulk-docs/src/release/release-0.8.5.rst +0 -11
  406. data/embulk-docs/src/release/release-0.8.6.rst +0 -14
  407. data/embulk-docs/src/release/release-0.8.7.rst +0 -18
  408. data/embulk-docs/src/release/release-0.8.8.rst +0 -18
  409. data/embulk-docs/src/release/release-0.8.9.rst +0 -14
  410. data/embulk-jruby-strptime/build.gradle +0 -3
  411. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/RubyDateParser.java +0 -121
  412. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/StrptimeFormat.java +0 -53
  413. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/StrptimeParser.java +0 -884
  414. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/StrptimeToken.java +0 -111
  415. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/TimeZoneConverter.java +0 -466
  416. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/lexer/StrptimeLexer.flex +0 -62
  417. data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/lexer/StrptimeLexer.java +0 -577
  418. data/embulk-standards/build.gradle +0 -7
  419. data/embulk-standards/src/main/java/org/embulk/standards/Bzip2FileDecoderPlugin.java +0 -55
  420. data/embulk-standards/src/main/java/org/embulk/standards/Bzip2FileEncoderPlugin.java +0 -67
  421. data/embulk-standards/src/main/java/org/embulk/standards/ConfigInputPlugin.java +0 -170
  422. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +0 -295
  423. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +0 -416
  424. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +0 -545
  425. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +0 -55
  426. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +0 -71
  427. data/embulk-standards/src/main/java/org/embulk/standards/JsonParserPlugin.java +0 -235
  428. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +0 -232
  429. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +0 -148
  430. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +0 -59
  431. data/embulk-standards/src/main/java/org/embulk/standards/RemoveColumnsFilterPlugin.java +0 -268
  432. data/embulk-standards/src/main/java/org/embulk/standards/RenameFilterPlugin.java +0 -479
  433. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +0 -16
  434. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +0 -60
  435. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +0 -91
  436. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +0 -1
  437. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvFormatterPlugin.java +0 -312
  438. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +0 -75
  439. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +0 -457
  440. data/embulk-standards/src/test/java/org/embulk/standards/TestJsonParserPlugin.java +0 -351
  441. data/embulk-standards/src/test/java/org/embulk/standards/TestRemoveColumnsFilterPlugin.java +0 -121
  442. data/embulk-standards/src/test/java/org/embulk/standards/TestRenameFilterPlugin.java +0 -1020
  443. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvAllStringsGuessPlugin.java +0 -38
  444. data/embulk-standards/src/test/java/org/embulk/standards/guess/TestCsvGuessPlugin.java +0 -248
  445. data/embulk-standards/src/test/java/org/embulk/standards/preview/TestFilePreview.java +0 -73
  446. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row.csv +0 -1
  447. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header.csv +0 -2
  448. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_guessed.yml +0 -12
  449. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_and_header_seed.yml +0 -1
  450. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_guessed.yml +0 -12
  451. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_int_single_column_row_seed.yml +0 -1
  452. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows.csv +0 -1
  453. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header.csv +0 -2
  454. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_guessed.yml +0 -16
  455. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_seed.yml +0 -1
  456. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed.csv +0 -2
  457. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_guessed.yml +0 -16
  458. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_and_header_with_trim_needed_seed.yml +0 -1
  459. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_guessed.yml +0 -16
  460. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_seed.yml +0 -1
  461. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed.csv +0 -1
  462. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_guessed.yml +0 -16
  463. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_rows_with_trim_needed_seed.yml +0 -1
  464. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row.csv +0 -1
  465. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header.csv +0 -2
  466. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_guessed.yml +0 -12
  467. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_and_header_seed.yml +0 -1
  468. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_guessed.yml +0 -12
  469. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_1_string_single_column_row_seed.yml +0 -1
  470. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows.csv +0 -2
  471. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_guessed.yml +0 -12
  472. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_int_single_column_rows_seed.yml +0 -1
  473. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows.csv +0 -2
  474. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header.csv +0 -3
  475. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_guessed.yml +0 -16
  476. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_and_header_seed.yml +0 -1
  477. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_guessed.yml +0 -16
  478. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_rows_seed.yml +0 -1
  479. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows.csv +0 -2
  480. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_guessed.yml +0 -12
  481. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_2_string_single_column_rows_seed.yml +0 -1
  482. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape.csv +0 -5
  483. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_guessed.yml +0 -17
  484. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_backslash_escape_seed.yml +0 -1
  485. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_double_single_column.csv +0 -12
  486. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_double_single_column_guessed.yml +0 -12
  487. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_double_single_column_seed.yml +0 -1
  488. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column.csv +0 -4
  489. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_guessed.yml +0 -12
  490. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_seed.yml +0 -1
  491. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header.csv +0 -5
  492. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_guessed.yml +0 -12
  493. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_int_single_column_with_header_seed.yml +0 -1
  494. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter.csv +0 -5
  495. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_guessed.yml +0 -17
  496. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_semicolon_delimiter_seed.yml +0 -1
  497. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple.csv +0 -5
  498. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_guessed.yml +0 -17
  499. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_simple_seed.yml +0 -1
  500. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote.csv +0 -5
  501. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_guessed.yml +0 -17
  502. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_single_quote_seed.yml +0 -1
  503. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records.csv +0 -5
  504. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_guessed.yml +0 -2
  505. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_skip_suggest_if_empty_sample_records_seed.yml +0 -1
  506. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column.csv +0 -4
  507. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_guessed.yml +0 -12
  508. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_seed.yml +0 -1
  509. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header.csv +0 -5
  510. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_guessed.yml +0 -12
  511. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_string_single_column_with_header_seed.yml +0 -1
  512. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter.csv +0 -4
  513. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_guessed.yml +0 -16
  514. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv/test/test_tab_delimiter_seed.yml +0 -1
  515. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple.csv +0 -5
  516. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_guessed.yml +0 -17
  517. data/embulk-standards/src/test/resources/org/embulk/standards/guess/csv_all_strings/test/test_simple_seed.yml +0 -1
  518. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes.csv +0 -5
  519. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_exec.yml +0 -1
  520. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_load.yml +0 -19
  521. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_sample_buffer_bytes_previewed.csv +0 -1
  522. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple.csv +0 -5
  523. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_load.yml +0 -19
  524. data/embulk-standards/src/test/resources/org/embulk/standards/preview/file/test/test_simple_previewed.csv +0 -4
  525. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep.csv +0 -5
  526. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_expected.csv +0 -4
  527. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_filter.yml +0 -2
  528. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_in.yml +0 -18
  529. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.csv +0 -5
  530. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names.yml +0 -2
  531. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_expected.csv +0 -4
  532. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_duplicated_column_names_in.yml +0 -17
  533. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_with_unmatched_filter.yml +0 -3
  534. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_keep_without_unmatched_filter.yml +0 -2
  535. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove.csv +0 -5
  536. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_expected.csv +0 -4
  537. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_filter.yml +0 -2
  538. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_in.yml +0 -18
  539. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_with_unmatched_filter.yml +0 -3
  540. data/embulk-standards/src/test/resources/org/embulk/standards/remove_columns/test/test_remove_without_unmatched_filter.yml +0 -2
  541. data/embulk-test/build.gradle +0 -6
  542. data/embulk-test/src/main/java/org/embulk/test/EmbulkTests.java +0 -75
  543. data/embulk-test/src/main/java/org/embulk/test/PreviewResultInputPlugin.java +0 -65
  544. data/embulk-test/src/main/java/org/embulk/test/TestingBulkLoader.java +0 -129
  545. data/embulk-test/src/main/java/org/embulk/test/TestingEmbulk.java +0 -710
  546. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  547. data/gradle/wrapper/gradle-wrapper.properties +0 -5
  548. data/gradlew +0 -172
  549. data/gradlew.bat +0 -84
  550. data/lib/embulk/data/bundle/.bundle/config +0 -3
  551. data/lib/embulk/data/bundle/.ruby-version +0 -1
  552. data/lib/embulk/data/bundle/Gemfile +0 -31
  553. data/lib/embulk/data/bundle/embulk/filter/example.rb +0 -42
  554. data/lib/embulk/data/bundle/embulk/input/example.rb +0 -54
  555. data/lib/embulk/data/bundle/embulk/output/example.rb +0 -58
  556. data/lib/embulk/data/new/LICENSE.txt +0 -21
  557. data/lib/embulk/data/new/README.md.vm +0 -106
  558. data/lib/embulk/data/new/gitignore.vm +0 -17
  559. data/lib/embulk/data/new/java/build.gradle.vm +0 -96
  560. data/lib/embulk/data/new/java/config/checkstyle/checkstyle.xml +0 -128
  561. data/lib/embulk/data/new/java/config/checkstyle/default.xml +0 -108
  562. data/lib/embulk/data/new/java/decoder.java.vm +0 -86
  563. data/lib/embulk/data/new/java/encoder.java.vm +0 -88
  564. data/lib/embulk/data/new/java/file_input.java.vm +0 -145
  565. data/lib/embulk/data/new/java/file_output.java.vm +0 -95
  566. data/lib/embulk/data/new/java/filter.java.vm +0 -57
  567. data/lib/embulk/data/new/java/formatter.java.vm +0 -55
  568. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  569. data/lib/embulk/data/new/java/gradle/wrapper/gradle-wrapper.properties +0 -5
  570. data/lib/embulk/data/new/java/gradlew +0 -172
  571. data/lib/embulk/data/new/java/gradlew.bat +0 -84
  572. data/lib/embulk/data/new/java/input.java.vm +0 -89
  573. data/lib/embulk/data/new/java/output.java.vm +0 -79
  574. data/lib/embulk/data/new/java/parser.java.vm +0 -61
  575. data/lib/embulk/data/new/java/plugin_loader.rb.vm +0 -3
  576. data/lib/embulk/data/new/java/test.java.vm +0 -5
  577. data/lib/embulk/data/new/ruby/.ruby-version +0 -1
  578. data/lib/embulk/data/new/ruby/Gemfile +0 -2
  579. data/lib/embulk/data/new/ruby/Rakefile +0 -3
  580. data/lib/embulk/data/new/ruby/decoder_guess.rb.vm +0 -25
  581. data/lib/embulk/data/new/ruby/filter.rb.vm +0 -50
  582. data/lib/embulk/data/new/ruby/formatter.rb.vm +0 -49
  583. data/lib/embulk/data/new/ruby/gemspec.vm +0 -20
  584. data/lib/embulk/data/new/ruby/input.rb.vm +0 -59
  585. data/lib/embulk/data/new/ruby/output.rb.vm +0 -61
  586. data/lib/embulk/data/new/ruby/parser.rb.vm +0 -44
  587. data/lib/embulk/data/new/ruby/parser_guess.rb.vm +0 -65
  588. data/lib/embulk/data/package_data.rb +0 -59
  589. data/lib/embulk/guess/bzip2.rb +0 -23
  590. data/lib/embulk/guess/csv.rb +0 -374
  591. data/lib/embulk/guess/csv_all_strings.rb +0 -13
  592. data/lib/embulk/guess/gzip.rb +0 -18
  593. data/lib/embulk/guess/json.rb +0 -50
  594. data/lib/embulk/runner.rb +0 -53
  595. data/settings.gradle +0 -8
  596. data/test/helper.rb +0 -23
  597. data/test/monkey_strptime/mri/date/test_date_strptime.rb +0 -515
  598. data/test/monkey_strptime/run-test.rb +0 -29
  599. data/test/vanilla/guess/test_csv_all_strings.rb +0 -43
  600. data/test/vanilla/guess/test_csv_guess.rb +0 -170
  601. data/test/vanilla/guess/test_schema_guess.rb +0 -47
  602. data/test/vanilla/guess/test_time_format_guess.rb +0 -135
  603. data/test/vanilla/run-test.rb +0 -14
  604. data/test/vanilla/time/test_timestamp_parser.rb +0 -117
@@ -1,59 +0,0 @@
1
- module Embulk
2
-
3
- class PackageData
4
- def initialize(base_name, dest_dir, erb_binding=nil)
5
- require 'fileutils'
6
- @base_name = base_name
7
- @dest_dir = dest_dir
8
- @erb_binding = erb_binding
9
- end
10
-
11
- def path(src)
12
- Embulk.lib_path("embulk/data/#{@base_name}/#{src}")
13
- end
14
-
15
- def content(src)
16
- File.read(path(src))
17
- end
18
-
19
- def bincontent(src)
20
- File.binread(path(src))
21
- end
22
-
23
- def erb(src)
24
- require 'erb'
25
- ERB.new(content(src), nil, '%').result(@erb_binding)
26
- end
27
-
28
- def cp(src, dest_name)
29
- dest = dest_path_message(dest_name)
30
- File.open(dest, "wb") do |dst_io|
31
- File.open(path(src), "rb") do |src_io|
32
- FileUtils.copy_stream src_io, dst_io
33
- end
34
- end
35
- end
36
-
37
- def cp_erb(src, dest_name)
38
- dest = dest_path_message(dest_name)
39
- File.open(dest, "wb") {|f| f.write erb(src) }
40
- end
41
-
42
- def dest_path(dest_name)
43
- File.join(@dest_dir, *dest_name.split('/'))
44
- end
45
-
46
- def dest_path_message(dest_name)
47
- path = dest_path(dest_name)
48
- puts " Creating #{path}"
49
- FileUtils.mkdir_p File.dirname(path)
50
- path
51
- end
52
-
53
- def set_executable(dest_name)
54
- dest = dest_path(dest_name)
55
- File.chmod(File.stat(dest).mode | 0111, dest)
56
- end
57
- end
58
-
59
- end
@@ -1,23 +0,0 @@
1
- module Embulk
2
- module Guess
3
-
4
- class Bzip2GuessPlugin < GuessPlugin
5
- Plugin.register_guess('bzip2', self)
6
-
7
- # magic: BZ
8
- # version: 'h' = bzip2
9
- # blocksize: 1 .. 9
10
- # block magic: 0x314159265359 (6 bytes)
11
- block_magic = [0x31, 0x41, 0x59, 0x26, 0x53, 0x59].pack('C*')
12
- BZIP2_HEADER_PATTERN = /BZh[1-9]#{Regexp.quote(block_magic)}/n
13
-
14
- def guess(config, sample_buffer)
15
- if sample_buffer[0,10] =~ BZIP2_HEADER_PATTERN
16
- return {"decoders" => [{"type" => "bzip2"}]}
17
- end
18
- return {}
19
- end
20
- end
21
-
22
- end
23
- end
@@ -1,374 +0,0 @@
1
- module Embulk
2
- module Guess
3
- require 'embulk/guess/schema_guess'
4
-
5
- class CsvGuessPlugin < LineGuessPlugin
6
- Plugin.register_guess('csv', self)
7
-
8
- DELIMITER_CANDIDATES = [
9
- ",", "\t", "|", ";"
10
- ]
11
-
12
- QUOTE_CANDIDATES = [
13
- "\"", "'"
14
- ]
15
-
16
- ESCAPE_CANDIDATES = [
17
- "\\", '"'
18
- ]
19
-
20
- NULL_STRING_CANDIDATES = [
21
- "null",
22
- "NULL",
23
- "#N/A",
24
- "\\N", # MySQL LOAD, Hive STORED AS TEXTFILE
25
- ]
26
-
27
- COMMENT_LINE_MARKER_CANDIDATES = [
28
- "#",
29
- "//",
30
- ]
31
-
32
- MAX_SKIP_LINES = 10
33
- NO_SKIP_DETECT_LINES = 10
34
-
35
- def guess_lines(config, sample_lines)
36
- return {} unless config.fetch("parser", {}).fetch("type", "csv") == "csv"
37
-
38
- parser_config = config["parser"] || {}
39
- if parser_config["type"] == "csv" && parser_config["delimiter"]
40
- delim = parser_config["delimiter"]
41
- else
42
- delim = guess_delimiter(sample_lines)
43
- unless delim
44
- # assuming single column CSV
45
- delim = DELIMITER_CANDIDATES.first
46
- end
47
- end
48
-
49
- parser_guessed = DataSource.new.merge(parser_config).merge({"type" => "csv", "delimiter" => delim})
50
-
51
- unless parser_guessed.has_key?("quote")
52
- quote = guess_quote(sample_lines, delim)
53
- unless quote
54
- if !guess_force_no_quote(sample_lines, delim, '"')
55
- # assuming CSV follows RFC for quoting
56
- quote = '"'
57
- else
58
- # disable quoting (set null)
59
- end
60
- end
61
- parser_guessed["quote"] = quote
62
- end
63
- parser_guessed["quote"] = '"' if parser_guessed["quote"] == '' # setting '' is not allowed any more. this line converts obsoleted config syntax to explicit syntax.
64
-
65
- unless parser_guessed.has_key?("escape")
66
- if quote = parser_guessed["quote"]
67
- escape = guess_escape(sample_lines, delim, quote)
68
- unless escape
69
- if quote == '"'
70
- # assuming this CSV follows RFC for escaping
71
- escape = '"'
72
- else
73
- # disable escaping (set null)
74
- end
75
- end
76
- parser_guessed["escape"] = escape
77
- else
78
- # escape does nothing if quote is disabled
79
- end
80
- end
81
-
82
- unless parser_guessed.has_key?("null_string")
83
- null_string = guess_null_string(sample_lines, delim)
84
- parser_guessed["null_string"] = null_string if null_string
85
- # don't even set null_string to avoid confusion of null and 'null' in YAML format
86
- end
87
-
88
- # guessing skip_header_lines should be before guessing guess_comment_line_marker
89
- # because lines supplied to CsvTokenizer already don't include skipped header lines.
90
- # skipping empty lines is also disabled here because skipping header lines is done by
91
- # CsvParser which doesn't skip empty lines automatically
92
- sample_records = split_lines(parser_guessed, false, sample_lines, delim, {})
93
- skip_header_lines = guess_skip_header_lines(sample_records)
94
- sample_lines = sample_lines[skip_header_lines..-1]
95
- sample_records = sample_records[skip_header_lines..-1]
96
-
97
- unless parser_guessed.has_key?("comment_line_marker")
98
- comment_line_marker, sample_lines =
99
- guess_comment_line_marker(sample_lines, delim, parser_guessed["quote"], parser_guessed["null_string"])
100
- if comment_line_marker
101
- parser_guessed["comment_line_marker"] = comment_line_marker
102
- end
103
- end
104
-
105
- sample_records = split_lines(parser_guessed, true, sample_lines, delim, {})
106
-
107
- # It should fail if CSV parser cannot parse sample_lines.
108
- if sample_records.nil? || sample_records.empty?
109
- return {}
110
- end
111
-
112
- if sample_lines.size == 1
113
- # The file contains only 1 line. Assume that there are no header line.
114
- header_line = false
115
-
116
- column_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
117
-
118
- unless parser_guessed.has_key?("trim_if_not_quoted")
119
- sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
120
- column_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed)
121
- if column_types != column_types_trimmed
122
- parser_guessed["trim_if_not_quoted"] = true
123
- column_types = column_types_trimmed
124
- else
125
- parser_guessed["trim_if_not_quoted"] = false
126
- end
127
- end
128
- else
129
- # The file contains more than 1 line. If guessed first line's column types are all strings or boolean, and the types are
130
- # different from the other lines, assume that the first line is column names.
131
- first_types = SchemaGuess.types_from_array_records(sample_records[0, 1])
132
- other_types = SchemaGuess.types_from_array_records(sample_records[1..-1] || [])
133
-
134
- unless parser_guessed.has_key?("trim_if_not_quoted")
135
- sample_records_trimmed = split_lines(parser_guessed, true, sample_lines, delim, {"trim_if_not_quoted" => true})
136
- other_types_trimmed = SchemaGuess.types_from_array_records(sample_records_trimmed[1..-1] || [])
137
- if other_types != other_types_trimmed
138
- parser_guessed["trim_if_not_quoted"] = true
139
- other_types = other_types_trimmed
140
- else
141
- parser_guessed["trim_if_not_quoted"] = false
142
- end
143
- end
144
-
145
- header_line = (first_types != other_types && first_types.all? {|t| ["string", "boolean"].include?(t) }) || guess_string_header_line(sample_records)
146
- column_types = other_types
147
- end
148
-
149
- if column_types.empty?
150
- # TODO here is making the guessing failed if the file doesn't contain any columns. However,
151
- # this may not be convenient for users.
152
- return {}
153
- end
154
-
155
- if header_line
156
- parser_guessed["skip_header_lines"] = skip_header_lines + 1
157
- else
158
- parser_guessed["skip_header_lines"] = skip_header_lines
159
- end
160
-
161
- parser_guessed["allow_extra_columns"] = false unless parser_guessed.has_key?("allow_extra_columns")
162
- parser_guessed["allow_optional_columns"] = false unless parser_guessed.has_key?("allow_optional_columns")
163
-
164
- if header_line
165
- column_names = sample_records.first.map(&:strip)
166
- else
167
- column_names = (0..column_types.size).to_a.map {|i| "c#{i}" }
168
- end
169
- schema = []
170
- column_names.zip(column_types).each do |name,type|
171
- if name && type
172
- schema << new_column(name, type)
173
- end
174
- end
175
- parser_guessed["columns"] = schema
176
-
177
- return {"parser" => parser_guessed}
178
- end
179
-
180
- def new_column(name, type)
181
- if type.is_a?(SchemaGuess::TimestampTypeMatch)
182
- {"name" => name, "type" => type, "format" => type.format}
183
- else
184
- {"name" => name, "type" => type}
185
- end
186
- end
187
-
188
- private
189
-
190
- def split_lines(parser_config, skip_empty_lines, sample_lines, delim, extra_config)
191
- null_string = parser_config["null_string"]
192
- config = parser_config.merge(extra_config).merge({"charset" => "UTF-8", "columns" => []})
193
- parser_task = config.load_config(org.embulk.standards.CsvParserPlugin::PluginTask)
194
- data = sample_lines.map {|line| line.force_encoding('UTF-8') }.join(parser_task.getNewline.getString.encode('UTF-8'))
195
- sample = Buffer.from_ruby_string(data)
196
- decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
197
- tokenizer = org.embulk.standards.CsvTokenizer.new(decoder, parser_task)
198
- rows = []
199
- while tokenizer.nextFile
200
- while tokenizer.nextRecord(skip_empty_lines)
201
- begin
202
- columns = []
203
- while true
204
- begin
205
- column = tokenizer.nextColumn
206
- quoted = tokenizer.wasQuotedColumn
207
- if null_string && !quoted && column == null_string
208
- column = nil
209
- end
210
- columns << column
211
- rescue org.embulk.standards.CsvTokenizer::TooFewColumnsException
212
- rows << columns
213
- break
214
- end
215
- end
216
- rescue org.embulk.standards.CsvTokenizer::InvalidValueException
217
- # TODO warning
218
- tokenizer.skipCurrentLine
219
- end
220
- end
221
- end
222
- return rows
223
- rescue
224
- # TODO warning if fallback to this ad-hoc implementation
225
- sample_lines.map {|line| line.split(delim) }
226
- end
227
-
228
- def guess_delimiter(sample_lines)
229
- delim_weights = DELIMITER_CANDIDATES.map do |d|
230
- counts = sample_lines.map {|line| line.count(d) }
231
- total = array_sum(counts)
232
- if total > 0
233
- stddev = array_standard_deviation(counts)
234
- stddev = 0.000000001 if stddev == 0.0
235
- weight = total / stddev
236
- [d, weight]
237
- else
238
- [nil, 0]
239
- end
240
- end
241
-
242
- delim, weight = *delim_weights.sort_by {|d,weight| weight }.last
243
- if delim != nil && weight > 1
244
- return delim
245
- else
246
- return nil
247
- end
248
- end
249
-
250
- def guess_quote(sample_lines, delim)
251
- delim_regexp = Regexp.escape(delim)
252
- quote_weights = QUOTE_CANDIDATES.map do |q|
253
- weights = sample_lines.map do |line|
254
- q_regexp = Regexp.escape(q)
255
- count = line.count(q)
256
- if count > 0
257
- weight = count
258
- weight += line.scan(/(?:\A|#{delim_regexp})\s*#{q_regexp}(?:(?!#{q_regexp}).)*\s*#{q_regexp}(?:$|#{delim_regexp})/).size * 20
259
- weight += line.scan(/(?:\A|#{delim_regexp})\s*#{q_regexp}(?:(?!#{delim_regexp}).)*\s*#{q_regexp}(?:$|#{delim_regexp})/).size * 40
260
- weight
261
- else
262
- nil
263
- end
264
- end.compact
265
- weights.empty? ? 0 : array_avg(weights)
266
- end
267
- quote, weight = QUOTE_CANDIDATES.zip(quote_weights).sort_by {|q,w| w }.last
268
- if weight >= 10.0
269
- return quote
270
- else
271
- return nil
272
- end
273
- end
274
-
275
- def guess_force_no_quote(sample_lines, delim, quote_candidate)
276
- delim_regexp = Regexp.escape(delim)
277
- q_regexp = Regexp.escape(quote_candidate)
278
- sample_lines.any? do |line|
279
- # quoting character appear at the middle of a non-quoted value
280
- line =~ /(?:\A|#{delim_regexp})\s*[^#{q_regexp}]+#{q_regexp}/
281
- end
282
- end
283
-
284
- def guess_escape(sample_lines, delim, quote)
285
- guessed = ESCAPE_CANDIDATES.map do |str|
286
- regexp = /#{Regexp.quote(str)}(?:#{Regexp.quote(delim)}|#{Regexp.quote(quote)})/
287
- counts = sample_lines.map {|line| line.scan(regexp).count }
288
- count = counts.inject(0) {|r,c| r + c }
289
- [str, count]
290
- end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
291
- found = guessed.first
292
- return found ? found[0] : nil
293
- end
294
-
295
- def guess_null_string(sample_lines, delim)
296
- guessed = NULL_STRING_CANDIDATES.map do |str|
297
- regexp = /(?:^|#{Regexp.quote(delim)})#{Regexp.quote(str)}(?:$|#{Regexp.quote(delim)})/
298
- counts = sample_lines.map {|line| line.scan(regexp).count }
299
- count = counts.inject(0) {|r,c| r + c }
300
- [str, count]
301
- end.select {|str,count| count > 0 }.sort_by {|str,count| -count }
302
- found_str, found_count = guessed.first
303
- return found_str ? found_str : nil
304
- end
305
-
306
- def guess_skip_header_lines(sample_records)
307
- counts = sample_records.map {|records| records.size }
308
- (1..[MAX_SKIP_LINES, counts.length - 1].min).each do |i|
309
- check_row_count = counts[i-1]
310
- if counts[i, NO_SKIP_DETECT_LINES].all? {|c| c <= check_row_count }
311
- return i - 1
312
- end
313
- end
314
- return 0
315
- end
316
-
317
- def guess_comment_line_marker(sample_lines, delim, quote, null_string)
318
- exclude = []
319
- exclude << /^#{Regexp.escape(quote)}/ if quote && !quote.empty?
320
- exclude << /^#{Regexp.escape(null_string)}(?:#{Regexp.escape(delim)}|$)/ if null_string
321
-
322
- guessed = COMMENT_LINE_MARKER_CANDIDATES.map do |str|
323
- regexp = /^#{Regexp.quote(str)}/
324
- unmatch_lines = sample_lines.reject do |line|
325
- exclude.all? {|ex| line !~ ex } && line =~ regexp
326
- end
327
- match_count = sample_lines.size - unmatch_lines.size
328
- [str, match_count, unmatch_lines]
329
- end.select {|str,match_count,unmatch_lines| match_count > 0 }.sort_by {|str,match_count,unmatch_lines| -match_count }
330
-
331
- str, match_count, unmatch_lines = guessed.first
332
- if str
333
- return str, unmatch_lines
334
- else
335
- return nil, sample_lines
336
- end
337
- end
338
-
339
- def guess_string_header_line(sample_records)
340
- first = sample_records.first
341
- first.count.times do |column_index|
342
- lengths = sample_records.map {|row| row[column_index] }.compact.map {|v| v.to_s.size }
343
- if lengths.size > 1
344
- if array_variance(lengths[1..-1]) <= 0.2
345
- avg = array_avg(lengths[1..-1])
346
- if avg == 0.0 ? lengths[0] > 1 : (avg - lengths[0]).abs / avg > 0.7
347
- return true
348
- end
349
- end
350
- end
351
- end
352
- return false
353
- end
354
-
355
- def array_sum(array)
356
- array.inject(0) {|r,i| r += i }
357
- end
358
-
359
- def array_avg(array)
360
- array.inject(0.0) {|r,i| r += i } / array.size
361
- end
362
-
363
- def array_variance(array)
364
- avg = array_avg(array)
365
- array.inject(0.0) {|r,i| r += (i - avg) ** 2 } / array.size
366
- end
367
-
368
- def array_standard_deviation(array)
369
- Math.sqrt(array_variance(array))
370
- end
371
- end
372
-
373
- end
374
- end