embulk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +13 -0
  3. data/Gemfile +3 -0
  4. data/Gemfile.lock +33 -0
  5. data/README.md +117 -0
  6. data/Rakefile +58 -0
  7. data/bin/embulk +63 -0
  8. data/build.gradle +149 -0
  9. data/embulk-cli/build.gradle +6 -0
  10. data/embulk-cli/pom.xml +94 -0
  11. data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
  12. data/embulk-core/build.gradle +6 -0
  13. data/embulk-core/pom.xml +143 -0
  14. data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
  15. data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
  16. data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
  17. data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
  18. data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
  19. data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
  20. data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
  21. data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
  22. data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
  23. data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
  24. data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
  25. data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
  26. data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
  27. data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
  28. data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
  29. data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
  30. data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
  31. data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
  32. data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
  33. data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
  34. data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
  35. data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
  36. data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
  37. data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
  38. data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
  39. data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
  40. data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
  41. data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
  42. data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
  43. data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
  44. data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
  45. data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
  46. data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
  47. data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
  48. data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
  49. data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
  50. data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
  51. data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
  52. data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
  53. data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
  54. data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
  55. data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
  56. data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
  57. data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
  58. data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
  59. data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
  60. data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
  61. data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
  62. data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
  63. data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
  64. data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
  65. data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
  66. data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
  67. data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
  68. data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
  69. data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
  70. data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
  71. data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
  72. data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
  73. data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
  74. data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
  75. data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
  76. data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
  77. data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
  78. data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
  79. data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
  80. data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
  81. data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
  82. data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
  83. data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
  84. data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
  85. data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
  86. data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
  87. data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
  88. data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
  89. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
  90. data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
  91. data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
  92. data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
  93. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
  94. data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
  95. data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
  96. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
  97. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
  98. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
  99. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
  100. data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
  101. data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
  102. data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
  103. data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
  104. data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
  105. data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
  106. data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
  107. data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
  108. data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
  109. data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
  110. data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
  111. data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
  112. data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
  113. data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
  114. data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
  115. data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
  116. data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
  117. data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
  118. data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
  119. data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
  120. data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
  121. data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
  122. data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
  123. data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
  124. data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
  125. data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
  126. data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
  127. data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
  128. data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
  129. data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
  130. data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
  131. data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
  132. data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
  133. data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
  134. data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
  135. data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
  136. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
  137. data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
  138. data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
  139. data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
  140. data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
  141. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
  142. data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
  143. data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
  144. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
  145. data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
  146. data/embulk-standards/build.gradle +6 -0
  147. data/embulk-standards/pom.xml +68 -0
  148. data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
  149. data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
  150. data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
  151. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
  152. data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
  153. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
  154. data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
  155. data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
  156. data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
  157. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
  158. data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
  159. data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
  160. data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  161. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
  162. data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
  163. data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
  164. data/embulk.gemspec +27 -0
  165. data/examples/config.yml +34 -0
  166. data/examples/csv/sample.csv.gz +0 -0
  167. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  168. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  169. data/gradlew +164 -0
  170. data/gradlew.bat +90 -0
  171. data/lib/embulk.rb +16 -0
  172. data/lib/embulk/buffer.rb +17 -0
  173. data/lib/embulk/column.rb +47 -0
  174. data/lib/embulk/command/embulk.rb +39 -0
  175. data/lib/embulk/command/embulk_example.rb +32 -0
  176. data/lib/embulk/command/embulk_generate_bin.rb +62 -0
  177. data/lib/embulk/command/embulk_run.rb +243 -0
  178. data/lib/embulk/data/bundle/.bundle/config +3 -0
  179. data/lib/embulk/data/bundle/Gemfile +31 -0
  180. data/lib/embulk/data/bundle/Gemfile.lock +8 -0
  181. data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
  182. data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
  183. data/lib/embulk/data_source.rb +66 -0
  184. data/lib/embulk/error.rb +5 -0
  185. data/lib/embulk/guess_charset.rb +26 -0
  186. data/lib/embulk/guess_csv.rb +195 -0
  187. data/lib/embulk/guess_gzip.rb +18 -0
  188. data/lib/embulk/guess_newline.rb +20 -0
  189. data/lib/embulk/guess_plugin.rb +113 -0
  190. data/lib/embulk/input_plugin.rb +53 -0
  191. data/lib/embulk/java/bootstrap.rb +12 -0
  192. data/lib/embulk/java/imports.rb +26 -0
  193. data/lib/embulk/java/time_helper.rb +77 -0
  194. data/lib/embulk/output_plugin.rb +104 -0
  195. data/lib/embulk/page.rb +28 -0
  196. data/lib/embulk/page_builder.rb +22 -0
  197. data/lib/embulk/plugin.rb +152 -0
  198. data/lib/embulk/plugin_registry.rb +70 -0
  199. data/lib/embulk/schema.rb +85 -0
  200. data/lib/embulk/time_format_guess.rb +331 -0
  201. data/lib/embulk/version.rb +3 -0
  202. data/pom.xml +533 -0
  203. data/settings.gradle +5 -0
  204. metadata +370 -0
@@ -0,0 +1,70 @@
1
+
2
+ module Embulk
3
+ require 'embulk/error'
4
+
5
+ class PluginRegistry
6
+ def initialize(category, search_prefix)
7
+ @category = category
8
+ @search_prefix = search_prefix
9
+ @map = {}
10
+ end
11
+
12
+ attr_reader :category
13
+
14
+ def register(type, value)
15
+ type = type.to_sym
16
+ @map[type] = value
17
+ end
18
+
19
+ def lookup(type)
20
+ type = type.to_sym
21
+ if value = @map[type]
22
+ return value
23
+ end
24
+ search(type)
25
+ if value = @map[type]
26
+ return value
27
+ end
28
+ raise ConfigError, "Unknown #{@category} plugin '#{type}'."
29
+ end
30
+
31
+ def search(type)
32
+ name = "#{@search_prefix}#{type}"
33
+ begin
34
+ require name
35
+ return
36
+ rescue LoadError
37
+ end
38
+
39
+ # search from $LOAD_PATH
40
+ load_paths = $LOAD_PATH.map do |lp|
41
+ lpath = File.expand_path(File.join(lp, "#{name}.rb"))
42
+ File.exist?(lpath) ? lpath : nil
43
+ end
44
+
45
+ paths = [name] + load_paths.compact.sort # sort to prefer newer version
46
+ paths.each do |path|
47
+ begin
48
+ require path
49
+ return
50
+ rescue LoadError
51
+ end
52
+ end
53
+
54
+ # search gems
55
+ if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
56
+ specs = Gem::Specification.find_all do |spec|
57
+ spec.contains_requirable_file? name
58
+ end
59
+
60
+ # prefer newer version
61
+ specs = specs.sort_by {|spec| spec.version }
62
+ if spec = specs.last
63
+ spec.require_paths.each do |lib|
64
+ require "#{spec.full_gem_path}/#{lib}/#{name}"
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,85 @@
1
+ module Embulk
2
+
3
+ require 'embulk/column'
4
+
5
+ class Schema < Array
6
+ def initialize(src)
7
+ super
8
+
9
+ record_reader_script = "lambda do |reader|\n"
10
+ record_reader_script << "record = []\n"
11
+ each do |column|
12
+ column_script =
13
+ case column.type
14
+ when :boolean
15
+ "record << reader.getBoolean(#{column.index})"
16
+ when :long
17
+ "record << reader.getLong(#{column.index})"
18
+ when :double
19
+ "record << reader.getDouble(#{column.index})"
20
+ when :string
21
+ "record << reader.getString(#{column.index})"
22
+ when :timestamp
23
+ "record << reader.getTimestamp(#{column.index}).getRubyTime(JRuby.runtime)"
24
+ else
25
+ raise "Unknown type #{column.type.inspect}"
26
+ end
27
+ record_reader_script << column_script << "\n"
28
+ end
29
+ record_reader_script << "record\n"
30
+ record_reader_script << "end"
31
+ @record_reader = eval(record_reader_script)
32
+
33
+ record_writer_script = "lambda do |builder,record|\n"
34
+ record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
35
+ each do |column|
36
+ column_script =
37
+ case column.type
38
+ when :boolean
39
+ "builder.setBoolean(#{column.index}, record[#{column.index}])"
40
+ when :long
41
+ "builder.setLong(#{column.index}, record[#{column.index}])"
42
+ when :double
43
+ "builder.setDouble(#{column.index}, record[#{column.index}])"
44
+ when :string
45
+ "builder.setString(#{column.index}, record[#{column.index}])"
46
+ when :timestamp
47
+ "builder.setTimestamp(#{column.index}, java_timestamp_class.fromRubyTime(record[#{column.index}]))"
48
+ else
49
+ raise "Unknown type #{column.type.inspect}"
50
+ end
51
+ record_writer_script << column_script << "\n"
52
+ end
53
+ record_writer_script << "builder.addRecord\n"
54
+ record_writer_script << "end"
55
+ @record_writer = eval(record_writer_script)
56
+
57
+ @names = map {|c| c.name }
58
+ @types = map {|c| c.type }
59
+
60
+ freeze
61
+ end
62
+
63
+ attr_reader :names, :types
64
+
65
+ def read_record(page_reader)
66
+ @record_reader.call(page_reader)
67
+ end
68
+
69
+ def write_record(page_builder, record)
70
+ @record_writer.call(page_builder, record)
71
+ end
72
+
73
+ if Embulk.java?
74
+ def self.from_java_object(java_schema)
75
+ new java_schema.getColumns.map {|column| Column.from_java_object(column) }
76
+ end
77
+
78
+ def java_object
79
+ columns = self.map {|column| column.java_object }
80
+ Java::Schema.new(columns)
81
+ end
82
+ end
83
+ end
84
+
85
+ end
@@ -0,0 +1,331 @@
1
+ module Embulk::TimeFormatGuess
2
+ module Parts
3
+ YEAR = /[1-4][0-9]{3}/
4
+ MONTH = /10|11|12|[0 ]?[0-9]/
5
+ MONTH_NODELIM = /10|11|12|[0][0-9]/
6
+ DAY = /[1-2][0-9]|[0 ]?[1-9]|30|31/
7
+ DAY_NODELIM = /[1-2][0-9]|[0][1-9]|30|31/
8
+ HOUR = /20|21|22|23|24|1[0-9]|[0 ]?[0-9]/
9
+ HOUR_NODELIM = /20|21|22|23|24|1[0-9]|[0][0-9]/
10
+ MINUTE = SECOND = /60|[1-5][0-9]|[0 ]?[0-9]/
11
+ MINUTE_NODELIM = SECOND_NODELIM = /60|[1-5][0-9]|[0][0-9]/
12
+
13
+ MONTH_NAME_SHORT = /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/
14
+ MONTH_NAME_FULL = /January|February|March|April|May|June|July|August|September|October|November|December/
15
+
16
+ WEEKDAY_NAME_SHORT = /Sun|Mon|Tue|Wed|Thu|Fri|Sat/
17
+ WEEKDAY_NAME_FULL = /Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday/
18
+ end
19
+
20
+ class GuessMatch
21
+ def initialize(delimiters, parts, part_options)
22
+ @delimiters = delimiters
23
+ @parts = parts
24
+ @part_options = part_options
25
+ end
26
+
27
+ def format
28
+ format = ''
29
+ @parts.size.times do |i|
30
+ format << @delimiters[i-1] if i != 0
31
+ option = @part_options[i]
32
+
33
+ case @parts[i]
34
+ when :year
35
+ format << '%Y'
36
+
37
+ when :month
38
+ case option
39
+ when :zero
40
+ format << '%m'
41
+ when :blank
42
+ #format << '%_m' # not supported
43
+ format << '%m'
44
+ when :none
45
+ #format << '%-m' # not supported
46
+ format << '%m'
47
+ else
48
+ format << '%m'
49
+ end
50
+
51
+ when :day
52
+ case option
53
+ when :zero
54
+ format << '%d'
55
+ when :blank
56
+ format << '%e'
57
+ when :none
58
+ format << '%d' # not supported
59
+ else
60
+ format << '%d'
61
+ end
62
+
63
+ when :hour
64
+ case option
65
+ when :zero
66
+ format << '%H'
67
+ when :blank
68
+ format << '%k'
69
+ when :none
70
+ format << '%k' # not supported
71
+ else
72
+ format << '%H'
73
+ end
74
+
75
+ when :minute
76
+ # heading options are not supported
77
+ format << '%M'
78
+
79
+ when :second
80
+ # heading options are not supported
81
+ format << '%S'
82
+
83
+ when :frac
84
+ if option <= 3
85
+ format << '%L'
86
+ #elsif option <= 6
87
+ # format << '%6N'
88
+ #elsif option <= 6
89
+ # format << '%6N'
90
+ #elsif option <= 9
91
+ # format << '%9N'
92
+ #elsif option <= 12
93
+ # format << '%12N'
94
+ #elsif option <= 15
95
+ # format << '%15N'
96
+ #elsif option <= 18
97
+ # format << '%18N'
98
+ #elsif option <= 21
99
+ # format << '%21N'
100
+ #elsif option <= 24
101
+ # format << '%24N'
102
+ else
103
+ format << '%N'
104
+ end
105
+
106
+ when :zone_off
107
+ format << '%z'
108
+
109
+ when :zone_abb
110
+ format << '%Z'
111
+
112
+ else
113
+ raise "Unknown part: #{@parts[i]}"
114
+ end
115
+ end
116
+
117
+ return format
118
+ end
119
+
120
+ def mergeable_group
121
+ [@delimiters, @parts]
122
+ end
123
+
124
+ attr_reader :part_options
125
+
126
+ def merge!(another_in_group)
127
+ part_options = another_in_group.part_options
128
+ @part_options.size.times do |i|
129
+ @part_options[i] ||= part_options[i]
130
+ if @part_options[i] == nil
131
+ part_options[i]
132
+ elsif part_options[i] == nil
133
+ @part_options[i]
134
+ else
135
+ [@part_options[i], part_options[i]].sort.last
136
+ end
137
+ end
138
+ end
139
+ end
140
+
141
+ class GuessPattern
142
+ include Parts
143
+
144
+ date_delims = /[\/\-]/
145
+ # yyyy-MM-dd
146
+ YMD = /(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
147
+ YMD_NODELIM = /(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
148
+ # dd/MM/yyyy
149
+ DMY = /(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
150
+ DMY_NODELIM = /(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
151
+
152
+ frac = /[0-9]{1,24}/
153
+ time_delims = /[\:\-]/
154
+ frac_delims = /[\.\,]/
155
+ TIME = /(?<hour>#{HOUR})(?<time_delim>#{time_delims})(?<minute>#{MINUTE})(?:\k<time_delim>(?<second>#{SECOND})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?/
156
+ TIME_NODELIM = /(?<hour>#{HOUR_NODELIM})(?<minute>#{MINUTE_NODELIM})((?<second>#{SECOND_NODELIM})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?/
157
+
158
+ TZ = /(?<zone_space> )?(?<zone>(?<zone_off>[\-\+]\d\d(?::?\d\d)?)|(?<zone_abb>[A-Z]{3}))|(?<z>Z)/
159
+
160
+ def match(text)
161
+ delimiters = []
162
+ parts = []
163
+ part_options = []
164
+
165
+ if dm = (/^#{YMD}(?<rest>.*?)$/.match(text) or /^#{YMD_NODELIM}(?<rest>.*?)$/.match(text))
166
+ date_delim = dm["date_delim"] rescue ""
167
+
168
+ parts << :year
169
+ part_options << nil
170
+ delimiters << date_delim
171
+
172
+ parts << :month
173
+ part_options << part_heading_option(dm["month"])
174
+ delimiters << date_delim
175
+
176
+ parts << :day
177
+ part_options << part_heading_option(dm["day"])
178
+
179
+ elsif dm = (/^#{DMY}(?<rest>.*?)$/.match(text) or /^#{DMY_NODELIM}(?<rest>.*?)$/.match(text))
180
+ date_delim = dm["date_delim"] rescue ""
181
+
182
+ parts << :day
183
+ part_options << part_heading_option(dm["day"])
184
+ delimiters << date_delim
185
+
186
+ parts << :month
187
+ part_options << part_heading_option(dm["month"])
188
+ delimiters << date_delim
189
+
190
+ parts << :year
191
+ part_options << nil
192
+ delimiters << date_delim
193
+
194
+ else
195
+ date_delim = ""
196
+ return nil
197
+ end
198
+ rest = dm["rest"]
199
+
200
+ date_time_delims = /[ _T]/
201
+ if tm = (
202
+ /^(?<date_time_delim>#{date_time_delims})#{TIME}(?<rest>.*?)?$/.match(rest) or
203
+ /^(?<date_time_delim>#{date_time_delims})#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest) or
204
+ (date_delim == "" && /^#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest))
205
+ )
206
+ date_time_delim = tm["date_time_delim"] rescue ""
207
+ time_delim = tm["time_delim"] rescue ""
208
+
209
+ delimiters << date_time_delim
210
+ parts << :hour
211
+ part_options << part_heading_option(tm["hour"])
212
+
213
+ delimiters << time_delim
214
+ parts << :minute
215
+ part_options << part_heading_option(tm["minute"])
216
+
217
+ if tm["second"]
218
+ delimiters << time_delim
219
+ parts << :second
220
+ part_options << part_heading_option(tm["second"])
221
+ end
222
+
223
+ if tm["frac"]
224
+ delimiters << tm["frac_delim"]
225
+ parts << :frac
226
+ part_options << tm["frac"].size
227
+ end
228
+
229
+ rest = tm["rest"]
230
+ end
231
+
232
+ if zm = /^#{TZ}$/.match(rest)
233
+ delimiters << zm["zone_space"] || ''
234
+ if zm["z"]
235
+ # TODO ISO 8601
236
+ parts << :zone_off
237
+ elsif zm["zone_off"]
238
+ parts << :zone_off
239
+ else
240
+ parts << :zone_abb
241
+ end
242
+ part_options << nil
243
+
244
+ return GuessMatch.new(delimiters, parts, part_options)
245
+
246
+ elsif rest =~ /^\s*$/
247
+ return GuessMatch.new(delimiters, parts, part_options)
248
+
249
+ else
250
+ return nil
251
+ end
252
+ end
253
+
254
+ def part_heading_option(text)
255
+ if text[0] == '0'
256
+ :zero
257
+ elsif text[0] == ' '
258
+ :blank
259
+ elsif text.size == 1
260
+ :none
261
+ else
262
+ nil
263
+ end
264
+ end
265
+ end
266
+
267
+ class RegexpMatch
268
+ def initialize(format)
269
+ @format
270
+ end
271
+
272
+ attr_reader :format
273
+
274
+ def mergeable_group
275
+ @format
276
+ end
277
+
278
+ def merge!(another_in_group)
279
+ end
280
+ end
281
+
282
+ class RegexpPattern
283
+ def initialize(regexp, format)
284
+ @regexp = regexp
285
+ @match = RegexpMatch.new(format)
286
+ end
287
+
288
+ def match(text)
289
+ if @regexp =~ text
290
+ return @match
291
+ else
292
+ return nil
293
+ end
294
+ end
295
+ end
296
+
297
+ module StandardPatterns
298
+ include Parts
299
+
300
+ RFC_822_1123 = /^#{WEEKDAY_NAME_SHORT}, \d\d #{MONTH_NAME_SHORT} \d\d\d\d \d\d:\d\d:\d\d [a-zA-Z]{3}$/
301
+ RFC_850_1035 = /^#{WEEKDAY_NAME_FULL}, \d\d-#{MONTH_NAME_SHORT}-\d\d \d\d:\d\d:\d\d [a-zA-Z]{3}$/
302
+ APACHE_CLF = /^\d\d\/#{MONTH_NAME_SHORT}\/\d\d\d\d \d\d:\d\d:\d\d [\-\+]\d\d(?::?\d\d)?$/
303
+ ANSI_C_ASCTIME = /^#{WEEKDAY_NAME_SHORT} #{MONTH_NAME_SHORT} \d\d? \d\d:\d\d:\d\d \d\d\d\d$/
304
+ end
305
+
306
+ PATTERNS = [
307
+ GuessPattern.new,
308
+ RegexpPattern.new(StandardPatterns::RFC_822_1123, "%a, %d %b %Y %H:%M:%S %z"),
309
+ RegexpPattern.new(StandardPatterns::RFC_850_1035, "%A, %d-%b-%y %H:%M:%S %z"),
310
+ RegexpPattern.new(StandardPatterns::APACHE_CLF, "%d/%b/%Y %H:%M:%S %Z"),
311
+ RegexpPattern.new(StandardPatterns::ANSI_C_ASCTIME, "$a %b %e %H:%M:%S %Y"),
312
+ ]
313
+
314
+ def self.guess(texts)
315
+ texts = Array(texts).select {|text| text != "" }
316
+ matches = texts.map do |text|
317
+ PATTERNS.map {|pattern| pattern.match(text) }.compact
318
+ end.flatten
319
+ if matches.empty?
320
+ return nil
321
+ elsif matches.size == 1
322
+ return matches[0].format
323
+ else
324
+ match_groups = matches.group_by {|match| match.mergeable_group }
325
+ best_match_group = match_groups.sort_by {|group| group.size }.last[1]
326
+ best_match = best_match_group.shift
327
+ best_match_group.each {|m| best_match.merge!(m) }
328
+ return best_match.format
329
+ end
330
+ end
331
+ end