embulk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,70 @@
1
+
2
+ module Embulk
3
+ require 'embulk/error'
4
+
5
+ class PluginRegistry
6
+ def initialize(category, search_prefix)
7
+ @category = category
8
+ @search_prefix = search_prefix
9
+ @map = {}
10
+ end
11
+
12
+ attr_reader :category
13
+
14
+ def register(type, value)
15
+ type = type.to_sym
16
+ @map[type] = value
17
+ end
18
+
19
+ def lookup(type)
20
+ type = type.to_sym
21
+ if value = @map[type]
22
+ return value
23
+ end
24
+ search(type)
25
+ if value = @map[type]
26
+ return value
27
+ end
28
+ raise ConfigError, "Unknown #{@category} plugin '#{type}'."
29
+ end
30
+
31
+ def search(type)
32
+ name = "#{@search_prefix}#{type}"
33
+ begin
34
+ require name
35
+ return
36
+ rescue LoadError
37
+ end
38
+
39
+ # search from $LOAD_PATH
40
+ load_paths = $LOAD_PATH.map do |lp|
41
+ lpath = File.expand_path(File.join(lp, "#{name}.rb"))
42
+ File.exist?(lpath) ? lpath : nil
43
+ end
44
+
45
+ paths = [name] + load_paths.compact.sort # sort to prefer newer version
46
+ paths.each do |path|
47
+ begin
48
+ require path
49
+ return
50
+ rescue LoadError
51
+ end
52
+ end
53
+
54
+ # search gems
55
+ if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
56
+ specs = Gem::Specification.find_all do |spec|
57
+ spec.contains_requirable_file? name
58
+ end
59
+
60
+ # prefer newer version
61
+ specs = specs.sort_by {|spec| spec.version }
62
+ if spec = specs.last
63
+ spec.require_paths.each do |lib|
64
+ require "#{spec.full_gem_path}/#{lib}/#{name}"
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,85 @@
1
+ module Embulk
2
+
3
+ require 'embulk/column'
4
+
5
+ class Schema < Array
6
+ def initialize(src)
7
+ super
8
+
9
+ record_reader_script = "lambda do |reader|\n"
10
+ record_reader_script << "record = []\n"
11
+ each do |column|
12
+ column_script =
13
+ case column.type
14
+ when :boolean
15
+ "record << reader.getBoolean(#{column.index})"
16
+ when :long
17
+ "record << reader.getLong(#{column.index})"
18
+ when :double
19
+ "record << reader.getDouble(#{column.index})"
20
+ when :string
21
+ "record << reader.getString(#{column.index})"
22
+ when :timestamp
23
+ "record << reader.getTimestamp(#{column.index}).getRubyTime(JRuby.runtime)"
24
+ else
25
+ raise "Unknown type #{column.type.inspect}"
26
+ end
27
+ record_reader_script << column_script << "\n"
28
+ end
29
+ record_reader_script << "record\n"
30
+ record_reader_script << "end"
31
+ @record_reader = eval(record_reader_script)
32
+
33
+ record_writer_script = "lambda do |builder,record|\n"
34
+ record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
35
+ each do |column|
36
+ column_script =
37
+ case column.type
38
+ when :boolean
39
+ "builder.setBoolean(#{column.index}, record[#{column.index}])"
40
+ when :long
41
+ "builder.setLong(#{column.index}, record[#{column.index}])"
42
+ when :double
43
+ "builder.setDouble(#{column.index}, record[#{column.index}])"
44
+ when :string
45
+ "builder.setString(#{column.index}, record[#{column.index}])"
46
+ when :timestamp
47
+ "builder.setTimestamp(#{column.index}, java_timestamp_class.fromRubyTime(record[#{column.index}]))"
48
+ else
49
+ raise "Unknown type #{column.type.inspect}"
50
+ end
51
+ record_writer_script << column_script << "\n"
52
+ end
53
+ record_writer_script << "builder.addRecord\n"
54
+ record_writer_script << "end"
55
+ @record_writer = eval(record_writer_script)
56
+
57
+ @names = map {|c| c.name }
58
+ @types = map {|c| c.type }
59
+
60
+ freeze
61
+ end
62
+
63
+ attr_reader :names, :types
64
+
65
+ def read_record(page_reader)
66
+ @record_reader.call(page_reader)
67
+ end
68
+
69
+ def write_record(page_builder, record)
70
+ @record_writer.call(page_builder, record)
71
+ end
72
+
73
+ if Embulk.java?
74
+ def self.from_java_object(java_schema)
75
+ new java_schema.getColumns.map {|column| Column.from_java_object(column) }
76
+ end
77
+
78
+ def java_object
79
+ columns = self.map {|column| column.java_object }
80
+ Java::Schema.new(columns)
81
+ end
82
+ end
83
+ end
84
+
85
+ end
@@ -0,0 +1,331 @@
1
+ module Embulk::TimeFormatGuess
2
+ module Parts
3
+ YEAR = /[1-4][0-9]{3}/
4
+ MONTH = /10|11|12|[0 ]?[0-9]/
5
+ MONTH_NODELIM = /10|11|12|[0][0-9]/
6
+ DAY = /[1-2][0-9]|[0 ]?[1-9]|30|31/
7
+ DAY_NODELIM = /[1-2][0-9]|[0][1-9]|30|31/
8
+ HOUR = /20|21|22|23|24|1[0-9]|[0 ]?[0-9]/
9
+ HOUR_NODELIM = /20|21|22|23|24|1[0-9]|[0][0-9]/
10
+ MINUTE = SECOND = /60|[1-5][0-9]|[0 ]?[0-9]/
11
+ MINUTE_NODELIM = SECOND_NODELIM = /60|[1-5][0-9]|[0][0-9]/
12
+
13
+ MONTH_NAME_SHORT = /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/
14
+ MONTH_NAME_FULL = /January|February|March|April|May|June|July|August|September|October|November|December/
15
+
16
+ WEEKDAY_NAME_SHORT = /Sun|Mon|Tue|Wed|Thu|Fri|Sat/
17
+ WEEKDAY_NAME_FULL = /Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday/
18
+ end
19
+
20
+ class GuessMatch
21
+ def initialize(delimiters, parts, part_options)
22
+ @delimiters = delimiters
23
+ @parts = parts
24
+ @part_options = part_options
25
+ end
26
+
27
+ def format
28
+ format = ''
29
+ @parts.size.times do |i|
30
+ format << @delimiters[i-1] if i != 0
31
+ option = @part_options[i]
32
+
33
+ case @parts[i]
34
+ when :year
35
+ format << '%Y'
36
+
37
+ when :month
38
+ case option
39
+ when :zero
40
+ format << '%m'
41
+ when :blank
42
+ #format << '%_m' # not supported
43
+ format << '%m'
44
+ when :none
45
+ #format << '%-m' # not supported
46
+ format << '%m'
47
+ else
48
+ format << '%m'
49
+ end
50
+
51
+ when :day
52
+ case option
53
+ when :zero
54
+ format << '%d'
55
+ when :blank
56
+ format << '%e'
57
+ when :none
58
+ format << '%d' # not supported
59
+ else
60
+ format << '%d'
61
+ end
62
+
63
+ when :hour
64
+ case option
65
+ when :zero
66
+ format << '%H'
67
+ when :blank
68
+ format << '%k'
69
+ when :none
70
+ format << '%k' # not supported
71
+ else
72
+ format << '%H'
73
+ end
74
+
75
+ when :minute
76
+ # heading options are not supported
77
+ format << '%M'
78
+
79
+ when :second
80
+ # heading options are not supported
81
+ format << '%S'
82
+
83
+ when :frac
84
+ if option <= 3
85
+ format << '%L'
86
+ #elsif option <= 6
87
+ # format << '%6N'
88
+ #elsif option <= 6
89
+ # format << '%6N'
90
+ #elsif option <= 9
91
+ # format << '%9N'
92
+ #elsif option <= 12
93
+ # format << '%12N'
94
+ #elsif option <= 15
95
+ # format << '%15N'
96
+ #elsif option <= 18
97
+ # format << '%18N'
98
+ #elsif option <= 21
99
+ # format << '%21N'
100
+ #elsif option <= 24
101
+ # format << '%24N'
102
+ else
103
+ format << '%N'
104
+ end
105
+
106
+ when :zone_off
107
+ format << '%z'
108
+
109
+ when :zone_abb
110
+ format << '%Z'
111
+
112
+ else
113
+ raise "Unknown part: #{@parts[i]}"
114
+ end
115
+ end
116
+
117
+ return format
118
+ end
119
+
120
+ def mergeable_group
121
+ [@delimiters, @parts]
122
+ end
123
+
124
+ attr_reader :part_options
125
+
126
+ def merge!(another_in_group)
127
+ part_options = another_in_group.part_options
128
+ @part_options.size.times do |i|
129
+ @part_options[i] ||= part_options[i]
130
+ if @part_options[i] == nil
131
+ part_options[i]
132
+ elsif part_options[i] == nil
133
+ @part_options[i]
134
+ else
135
+ [@part_options[i], part_options[i]].sort.last
136
+ end
137
+ end
138
+ end
139
+ end
140
+
141
+ class GuessPattern
142
+ include Parts
143
+
144
+ date_delims = /[\/\-]/
145
+ # yyyy-MM-dd
146
+ YMD = /(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
147
+ YMD_NODELIM = /(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
148
+ # dd/MM/yyyy
149
+ DMY = /(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
150
+ DMY_NODELIM = /(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
151
+
152
+ frac = /[0-9]{1,24}/
153
+ time_delims = /[\:\-]/
154
+ frac_delims = /[\.\,]/
155
+ TIME = /(?<hour>#{HOUR})(?<time_delim>#{time_delims})(?<minute>#{MINUTE})(?:\k<time_delim>(?<second>#{SECOND})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?/
156
+ TIME_NODELIM = /(?<hour>#{HOUR_NODELIM})(?<minute>#{MINUTE_NODELIM})((?<second>#{SECOND_NODELIM})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?/
157
+
158
+ TZ = /(?<zone_space> )?(?<zone>(?<zone_off>[\-\+]\d\d(?::?\d\d)?)|(?<zone_abb>[A-Z]{3}))|(?<z>Z)/
159
+
160
+ def match(text)
161
+ delimiters = []
162
+ parts = []
163
+ part_options = []
164
+
165
+ if dm = (/^#{YMD}(?<rest>.*?)$/.match(text) or /^#{YMD_NODELIM}(?<rest>.*?)$/.match(text))
166
+ date_delim = dm["date_delim"] rescue ""
167
+
168
+ parts << :year
169
+ part_options << nil
170
+ delimiters << date_delim
171
+
172
+ parts << :month
173
+ part_options << part_heading_option(dm["month"])
174
+ delimiters << date_delim
175
+
176
+ parts << :day
177
+ part_options << part_heading_option(dm["day"])
178
+
179
+ elsif dm = (/^#{DMY}(?<rest>.*?)$/.match(text) or /^#{DMY_NODELIM}(?<rest>.*?)$/.match(text))
180
+ date_delim = dm["date_delim"] rescue ""
181
+
182
+ parts << :day
183
+ part_options << part_heading_option(dm["day"])
184
+ delimiters << date_delim
185
+
186
+ parts << :month
187
+ part_options << part_heading_option(dm["month"])
188
+ delimiters << date_delim
189
+
190
+ parts << :year
191
+ part_options << nil
192
+ delimiters << date_delim
193
+
194
+ else
195
+ date_delim = ""
196
+ return nil
197
+ end
198
+ rest = dm["rest"]
199
+
200
+ date_time_delims = /[ _T]/
201
+ if tm = (
202
+ /^(?<date_time_delim>#{date_time_delims})#{TIME}(?<rest>.*?)?$/.match(rest) or
203
+ /^(?<date_time_delim>#{date_time_delims})#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest) or
204
+ (date_delim == "" && /^#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest))
205
+ )
206
+ date_time_delim = tm["date_time_delim"] rescue ""
207
+ time_delim = tm["time_delim"] rescue ""
208
+
209
+ delimiters << date_time_delim
210
+ parts << :hour
211
+ part_options << part_heading_option(tm["hour"])
212
+
213
+ delimiters << time_delim
214
+ parts << :minute
215
+ part_options << part_heading_option(tm["minute"])
216
+
217
+ if tm["second"]
218
+ delimiters << time_delim
219
+ parts << :second
220
+ part_options << part_heading_option(tm["second"])
221
+ end
222
+
223
+ if tm["frac"]
224
+ delimiters << tm["frac_delim"]
225
+ parts << :frac
226
+ part_options << tm["frac"].size
227
+ end
228
+
229
+ rest = tm["rest"]
230
+ end
231
+
232
+ if zm = /^#{TZ}$/.match(rest)
233
+ delimiters << zm["zone_space"] || ''
234
+ if zm["z"]
235
+ # TODO ISO 8601
236
+ parts << :zone_off
237
+ elsif zm["zone_off"]
238
+ parts << :zone_off
239
+ else
240
+ parts << :zone_abb
241
+ end
242
+ part_options << nil
243
+
244
+ return GuessMatch.new(delimiters, parts, part_options)
245
+
246
+ elsif rest =~ /^\s*$/
247
+ return GuessMatch.new(delimiters, parts, part_options)
248
+
249
+ else
250
+ return nil
251
+ end
252
+ end
253
+
254
+ def part_heading_option(text)
255
+ if text[0] == '0'
256
+ :zero
257
+ elsif text[0] == ' '
258
+ :blank
259
+ elsif text.size == 1
260
+ :none
261
+ else
262
+ nil
263
+ end
264
+ end
265
+ end
266
+
267
+ class RegexpMatch
268
+ def initialize(format)
269
+ @format
270
+ end
271
+
272
+ attr_reader :format
273
+
274
+ def mergeable_group
275
+ @format
276
+ end
277
+
278
+ def merge!(another_in_group)
279
+ end
280
+ end
281
+
282
+ class RegexpPattern
283
+ def initialize(regexp, format)
284
+ @regexp = regexp
285
+ @match = RegexpMatch.new(format)
286
+ end
287
+
288
+ def match(text)
289
+ if @regexp =~ text
290
+ return @match
291
+ else
292
+ return nil
293
+ end
294
+ end
295
+ end
296
+
297
+ module StandardPatterns
298
+ include Parts
299
+
300
+ RFC_822_1123 = /^#{WEEKDAY_NAME_SHORT}, \d\d #{MONTH_NAME_SHORT} \d\d\d\d \d\d:\d\d:\d\d [a-zA-Z]{3}$/
301
+ RFC_850_1035 = /^#{WEEKDAY_NAME_FULL}, \d\d-#{MONTH_NAME_SHORT}-\d\d \d\d:\d\d:\d\d [a-zA-Z]{3}$/
302
+ APACHE_CLF = /^\d\d\/#{MONTH_NAME_SHORT}\/\d\d\d\d \d\d:\d\d:\d\d [\-\+]\d\d(?::?\d\d)?$/
303
+ ANSI_C_ASCTIME = /^#{WEEKDAY_NAME_SHORT} #{MONTH_NAME_SHORT} \d\d? \d\d:\d\d:\d\d \d\d\d\d$/
304
+ end
305
+
306
+ PATTERNS = [
307
+ GuessPattern.new,
308
+ RegexpPattern.new(StandardPatterns::RFC_822_1123, "%a, %d %b %Y %H:%M:%S %z"),
309
+ RegexpPattern.new(StandardPatterns::RFC_850_1035, "%A, %d-%b-%y %H:%M:%S %z"),
310
+ RegexpPattern.new(StandardPatterns::APACHE_CLF, "%d/%b/%Y %H:%M:%S %Z"),
311
+ RegexpPattern.new(StandardPatterns::ANSI_C_ASCTIME, "$a %b %e %H:%M:%S %Y"),
312
+ ]
313
+
314
+ def self.guess(texts)
315
+ texts = Array(texts).select {|text| text != "" }
316
+ matches = texts.map do |text|
317
+ PATTERNS.map {|pattern| pattern.match(text) }.compact
318
+ end.flatten
319
+ if matches.empty?
320
+ return nil
321
+ elsif matches.size == 1
322
+ return matches[0].format
323
+ else
324
+ match_groups = matches.group_by {|match| match.mergeable_group }
325
+ best_match_group = match_groups.sort_by {|group| group.size }.last[1]
326
+ best_match = best_match_group.shift
327
+ best_match_group.each {|m| best_match.merge!(m) }
328
+ return best_match.format
329
+ end
330
+ end
331
+ end