embulk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/README.md +117 -0
- data/Rakefile +58 -0
- data/bin/embulk +63 -0
- data/build.gradle +149 -0
- data/embulk-cli/build.gradle +6 -0
- data/embulk-cli/pom.xml +94 -0
- data/embulk-cli/src/main/java/org/embulk/cli/Main.java +15 -0
- data/embulk-core/build.gradle +6 -0
- data/embulk-core/pom.xml +143 -0
- data/embulk-core/src/main/java/org/embulk/EmbulkService.java +39 -0
- data/embulk-core/src/main/java/org/embulk/command/Runner.java +199 -0
- data/embulk-core/src/main/java/org/embulk/command/TablePrinter.java +119 -0
- data/embulk-core/src/main/java/org/embulk/config/CommitReport.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Config.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigDefault.java +15 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigException.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigLoader.java +83 -0
- data/embulk-core/src/main/java/org/embulk/config/ConfigSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSource.java +35 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceImpl.java +208 -0
- data/embulk-core/src/main/java/org/embulk/config/DataSourceSerDe.java +80 -0
- data/embulk-core/src/main/java/org/embulk/config/GenericTypeReference.java +20 -0
- data/embulk-core/src/main/java/org/embulk/config/ModelManager.java +125 -0
- data/embulk-core/src/main/java/org/embulk/config/NextConfig.java +26 -0
- data/embulk-core/src/main/java/org/embulk/config/Task.java +10 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskInvocationHandler.java +180 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSerDe.java +343 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskSource.java +28 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidationException.java +37 -0
- data/embulk-core/src/main/java/org/embulk/config/TaskValidator.java +24 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecModule.java +45 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteInterruptedException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExecuteResult.java +19 -0
- data/embulk-core/src/main/java/org/embulk/exec/ExtensionServiceLoaderModule.java +43 -0
- data/embulk-core/src/main/java/org/embulk/exec/ForSystemConfig.java +16 -0
- data/embulk-core/src/main/java/org/embulk/exec/GuessExecutor.java +307 -0
- data/embulk-core/src/main/java/org/embulk/exec/LocalExecutor.java +274 -0
- data/embulk-core/src/main/java/org/embulk/exec/LoggerProvider.java +30 -0
- data/embulk-core/src/main/java/org/embulk/exec/NoSampleException.java +10 -0
- data/embulk-core/src/main/java/org/embulk/exec/PooledBufferAllocator.java +58 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewExecutor.java +138 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewResult.java +27 -0
- data/embulk-core/src/main/java/org/embulk/exec/PreviewedNoticeError.java +17 -0
- data/embulk-core/src/main/java/org/embulk/exec/SamplingParserPlugin.java +116 -0
- data/embulk-core/src/main/java/org/embulk/exec/SystemConfigModule.java +24 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyPluginSource.java +69 -0
- data/embulk-core/src/main/java/org/embulk/jruby/JRubyScriptingModule.java +100 -0
- data/embulk-core/src/main/java/org/embulk/plugin/BuiltinPluginSourceModule.java +17 -0
- data/embulk-core/src/main/java/org/embulk/plugin/InjectedPluginSource.java +92 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginManager.java +34 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSource.java +6 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginSourceNotMatchException.java +19 -0
- data/embulk-core/src/main/java/org/embulk/plugin/PluginType.java +47 -0
- data/embulk-core/src/main/java/org/embulk/plugin/SetThreadContextClassLoader.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/Buffer.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/BufferAllocator.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/Column.java +92 -0
- data/embulk-core/src/main/java/org/embulk/spi/ColumnConfig.java +79 -0
- data/embulk-core/src/main/java/org/embulk/spi/DecoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/EncoderPlugin.java +16 -0
- data/embulk-core/src/main/java/org/embulk/spi/Exec.java +76 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecAction.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/ExecSession.java +105 -0
- data/embulk-core/src/main/java/org/embulk/spi/Extension.java +42 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputPlugin.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileInputRunner.java +113 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutput.java +13 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/FileOutputRunner.java +167 -0
- data/embulk-core/src/main/java/org/embulk/spi/FormatterPlugin.java +18 -0
- data/embulk-core/src/main/java/org/embulk/spi/GuessPlugin.java +9 -0
- data/embulk-core/src/main/java/org/embulk/spi/InputPlugin.java +20 -0
- data/embulk-core/src/main/java/org/embulk/spi/OutputPlugin.java +21 -0
- data/embulk-core/src/main/java/org/embulk/spi/Page.java +45 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageBuilder.java +327 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageFormat.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageOutput.java +11 -0
- data/embulk-core/src/main/java/org/embulk/spi/PageReader.java +227 -0
- data/embulk-core/src/main/java/org/embulk/spi/ParserPlugin.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/Schema.java +101 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaConfig.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/SchemaVisitor.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/Transactional.java +10 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileInput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalFileOutput.java +19 -0
- data/embulk-core/src/main/java/org/embulk/spi/TransactionalPageOutput.java +17 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/DateTimeZoneSerDe.java +57 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelper.java +8 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/JRubyTimeParserHelperFactory.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/Timestamp.java +159 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormat.java +98 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampFormatter.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParseException.java +6 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +60 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampSerDe.java +50 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/AbstractType.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/BooleanType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/DoubleType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/LongType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/StringType.java +12 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TimestampType.java +39 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Type.java +15 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/TypeDeserializer.java +47 -0
- data/embulk-core/src/main/java/org/embulk/spi/type/Types.java +14 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/CharsetSerDe.java +55 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Decoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Encoders.java +81 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileInputInputStream.java +110 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/FileOutputOutputStream.java +94 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/InputStreamFileInput.java +111 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Inputs.java +74 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineDecoder.java +118 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/LineEncoder.java +109 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/ListFileInput.java +52 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Newline.java +38 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/PagePrinter.java +102 -0
- data/embulk-core/src/main/java/org/embulk/spi/util/Pages.java +139 -0
- data/embulk-core/src/test/java/org/embulk/EmbulkTestRuntime.java +110 -0
- data/embulk-core/src/test/java/org/embulk/GuiceBinder.java +72 -0
- data/embulk-core/src/test/java/org/embulk/RandomManager.java +53 -0
- data/embulk-core/src/test/java/org/embulk/TestPluginSourceModule.java +23 -0
- data/embulk-core/src/test/java/org/embulk/TestUtilityModule.java +17 -0
- data/embulk-core/src/test/java/org/embulk/config/TestConfigSource.java +114 -0
- data/embulk-core/src/test/java/org/embulk/config/TestTaskSource.java +70 -0
- data/embulk-core/src/test/java/org/embulk/plugin/MockPluginSource.java +57 -0
- data/embulk-core/src/test/java/org/embulk/plugin/TestPluginType.java +18 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFileOutput.java +63 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockFormatterPlugin.java +101 -0
- data/embulk-core/src/test/java/org/embulk/spi/MockParserPlugin.java +73 -0
- data/embulk-core/src/test/java/org/embulk/spi/PageTestUtils.java +78 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputInputStream.java +67 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileInputRunner.java +180 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestFileOutputRunner.java +192 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestInputStreamFileInput.java +188 -0
- data/embulk-core/src/test/java/org/embulk/spi/TestPageBuilderReader.java +301 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestamp.java +116 -0
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +52 -0
- data/embulk-core/src/test/java/org/embulk/spi/type/TestTypeSerDe.java +45 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineDecoder.java +132 -0
- data/embulk-core/src/test/java/org/embulk/spi/util/TestLineEncoder.java +123 -0
- data/embulk-standards/build.gradle +6 -0
- data/embulk-standards/pom.xml +68 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvFormatterPlugin.java +158 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java +233 -0
- data/embulk-standards/src/main/java/org/embulk/standards/CsvTokenizer.java +355 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileDecoderPlugin.java +55 -0
- data/embulk-standards/src/main/java/org/embulk/standards/GzipFileEncoderPlugin.java +39 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileInputPlugin.java +138 -0
- data/embulk-standards/src/main/java/org/embulk/standards/LocalFileOutputPlugin.java +128 -0
- data/embulk-standards/src/main/java/org/embulk/standards/NullOutputPlugin.java +46 -0
- data/embulk-standards/src/main/java/org/embulk/standards/S3FileInputPlugin.java +238 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginExtension.java +16 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StandardPluginModule.java +44 -0
- data/embulk-standards/src/main/java/org/embulk/standards/StdoutOutputPlugin.java +71 -0
- data/embulk-standards/src/main/resources/META-INF/services/org.embulk.spi.Extension +1 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvParserPlugin.java +69 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestCsvTokenizer.java +291 -0
- data/embulk-standards/src/test/java/org/embulk/standards/TestS3FileInputPlugin.java +43 -0
- data/embulk.gemspec +27 -0
- data/examples/config.yml +34 -0
- data/examples/csv/sample.csv.gz +0 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk.rb +16 -0
- data/lib/embulk/buffer.rb +17 -0
- data/lib/embulk/column.rb +47 -0
- data/lib/embulk/command/embulk.rb +39 -0
- data/lib/embulk/command/embulk_example.rb +32 -0
- data/lib/embulk/command/embulk_generate_bin.rb +62 -0
- data/lib/embulk/command/embulk_run.rb +243 -0
- data/lib/embulk/data/bundle/.bundle/config +3 -0
- data/lib/embulk/data/bundle/Gemfile +31 -0
- data/lib/embulk/data/bundle/Gemfile.lock +8 -0
- data/lib/embulk/data/bundle/embulk/input_example.rb +40 -0
- data/lib/embulk/data/bundle/embulk/output_example.rb +51 -0
- data/lib/embulk/data_source.rb +66 -0
- data/lib/embulk/error.rb +5 -0
- data/lib/embulk/guess_charset.rb +26 -0
- data/lib/embulk/guess_csv.rb +195 -0
- data/lib/embulk/guess_gzip.rb +18 -0
- data/lib/embulk/guess_newline.rb +20 -0
- data/lib/embulk/guess_plugin.rb +113 -0
- data/lib/embulk/input_plugin.rb +53 -0
- data/lib/embulk/java/bootstrap.rb +12 -0
- data/lib/embulk/java/imports.rb +26 -0
- data/lib/embulk/java/time_helper.rb +77 -0
- data/lib/embulk/output_plugin.rb +104 -0
- data/lib/embulk/page.rb +28 -0
- data/lib/embulk/page_builder.rb +22 -0
- data/lib/embulk/plugin.rb +152 -0
- data/lib/embulk/plugin_registry.rb +70 -0
- data/lib/embulk/schema.rb +85 -0
- data/lib/embulk/time_format_guess.rb +331 -0
- data/lib/embulk/version.rb +3 -0
- data/pom.xml +533 -0
- data/settings.gradle +5 -0
- metadata +370 -0
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
module Embulk
|
3
|
+
require 'embulk/error'
|
4
|
+
|
5
|
+
class PluginRegistry
|
6
|
+
def initialize(category, search_prefix)
|
7
|
+
@category = category
|
8
|
+
@search_prefix = search_prefix
|
9
|
+
@map = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :category
|
13
|
+
|
14
|
+
def register(type, value)
|
15
|
+
type = type.to_sym
|
16
|
+
@map[type] = value
|
17
|
+
end
|
18
|
+
|
19
|
+
def lookup(type)
|
20
|
+
type = type.to_sym
|
21
|
+
if value = @map[type]
|
22
|
+
return value
|
23
|
+
end
|
24
|
+
search(type)
|
25
|
+
if value = @map[type]
|
26
|
+
return value
|
27
|
+
end
|
28
|
+
raise ConfigError, "Unknown #{@category} plugin '#{type}'."
|
29
|
+
end
|
30
|
+
|
31
|
+
def search(type)
|
32
|
+
name = "#{@search_prefix}#{type}"
|
33
|
+
begin
|
34
|
+
require name
|
35
|
+
return
|
36
|
+
rescue LoadError
|
37
|
+
end
|
38
|
+
|
39
|
+
# search from $LOAD_PATH
|
40
|
+
load_paths = $LOAD_PATH.map do |lp|
|
41
|
+
lpath = File.expand_path(File.join(lp, "#{name}.rb"))
|
42
|
+
File.exist?(lpath) ? lpath : nil
|
43
|
+
end
|
44
|
+
|
45
|
+
paths = [name] + load_paths.compact.sort # sort to prefer newer version
|
46
|
+
paths.each do |path|
|
47
|
+
begin
|
48
|
+
require path
|
49
|
+
return
|
50
|
+
rescue LoadError
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# search gems
|
55
|
+
if defined?(::Gem::Specification) && ::Gem::Specification.respond_to?(:find_all)
|
56
|
+
specs = Gem::Specification.find_all do |spec|
|
57
|
+
spec.contains_requirable_file? name
|
58
|
+
end
|
59
|
+
|
60
|
+
# prefer newer version
|
61
|
+
specs = specs.sort_by {|spec| spec.version }
|
62
|
+
if spec = specs.last
|
63
|
+
spec.require_paths.each do |lib|
|
64
|
+
require "#{spec.full_gem_path}/#{lib}/#{name}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Embulk
|
2
|
+
|
3
|
+
require 'embulk/column'
|
4
|
+
|
5
|
+
class Schema < Array
|
6
|
+
def initialize(src)
|
7
|
+
super
|
8
|
+
|
9
|
+
record_reader_script = "lambda do |reader|\n"
|
10
|
+
record_reader_script << "record = []\n"
|
11
|
+
each do |column|
|
12
|
+
column_script =
|
13
|
+
case column.type
|
14
|
+
when :boolean
|
15
|
+
"record << reader.getBoolean(#{column.index})"
|
16
|
+
when :long
|
17
|
+
"record << reader.getLong(#{column.index})"
|
18
|
+
when :double
|
19
|
+
"record << reader.getDouble(#{column.index})"
|
20
|
+
when :string
|
21
|
+
"record << reader.getString(#{column.index})"
|
22
|
+
when :timestamp
|
23
|
+
"record << reader.getTimestamp(#{column.index}).getRubyTime(JRuby.runtime)"
|
24
|
+
else
|
25
|
+
raise "Unknown type #{column.type.inspect}"
|
26
|
+
end
|
27
|
+
record_reader_script << column_script << "\n"
|
28
|
+
end
|
29
|
+
record_reader_script << "record\n"
|
30
|
+
record_reader_script << "end"
|
31
|
+
@record_reader = eval(record_reader_script)
|
32
|
+
|
33
|
+
record_writer_script = "lambda do |builder,record|\n"
|
34
|
+
record_writer_script << "java_timestamp_class = ::Embulk::Java::Timestamp\n"
|
35
|
+
each do |column|
|
36
|
+
column_script =
|
37
|
+
case column.type
|
38
|
+
when :boolean
|
39
|
+
"builder.setBoolean(#{column.index}, record[#{column.index}])"
|
40
|
+
when :long
|
41
|
+
"builder.setLong(#{column.index}, record[#{column.index}])"
|
42
|
+
when :double
|
43
|
+
"builder.setDouble(#{column.index}, record[#{column.index}])"
|
44
|
+
when :string
|
45
|
+
"builder.setString(#{column.index}, record[#{column.index}])"
|
46
|
+
when :timestamp
|
47
|
+
"builder.setTimestamp(#{column.index}, java_timestamp_class.fromRubyTime(record[#{column.index}]))"
|
48
|
+
else
|
49
|
+
raise "Unknown type #{column.type.inspect}"
|
50
|
+
end
|
51
|
+
record_writer_script << column_script << "\n"
|
52
|
+
end
|
53
|
+
record_writer_script << "builder.addRecord\n"
|
54
|
+
record_writer_script << "end"
|
55
|
+
@record_writer = eval(record_writer_script)
|
56
|
+
|
57
|
+
@names = map {|c| c.name }
|
58
|
+
@types = map {|c| c.type }
|
59
|
+
|
60
|
+
freeze
|
61
|
+
end
|
62
|
+
|
63
|
+
attr_reader :names, :types
|
64
|
+
|
65
|
+
def read_record(page_reader)
|
66
|
+
@record_reader.call(page_reader)
|
67
|
+
end
|
68
|
+
|
69
|
+
def write_record(page_builder, record)
|
70
|
+
@record_writer.call(page_builder, record)
|
71
|
+
end
|
72
|
+
|
73
|
+
if Embulk.java?
|
74
|
+
def self.from_java_object(java_schema)
|
75
|
+
new java_schema.getColumns.map {|column| Column.from_java_object(column) }
|
76
|
+
end
|
77
|
+
|
78
|
+
def java_object
|
79
|
+
columns = self.map {|column| column.java_object }
|
80
|
+
Java::Schema.new(columns)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,331 @@
|
|
1
|
+
module Embulk::TimeFormatGuess
|
2
|
+
module Parts
|
3
|
+
YEAR = /[1-4][0-9]{3}/
|
4
|
+
MONTH = /10|11|12|[0 ]?[0-9]/
|
5
|
+
MONTH_NODELIM = /10|11|12|[0][0-9]/
|
6
|
+
DAY = /[1-2][0-9]|[0 ]?[1-9]|30|31/
|
7
|
+
DAY_NODELIM = /[1-2][0-9]|[0][1-9]|30|31/
|
8
|
+
HOUR = /20|21|22|23|24|1[0-9]|[0 ]?[0-9]/
|
9
|
+
HOUR_NODELIM = /20|21|22|23|24|1[0-9]|[0][0-9]/
|
10
|
+
MINUTE = SECOND = /60|[1-5][0-9]|[0 ]?[0-9]/
|
11
|
+
MINUTE_NODELIM = SECOND_NODELIM = /60|[1-5][0-9]|[0][0-9]/
|
12
|
+
|
13
|
+
MONTH_NAME_SHORT = /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/
|
14
|
+
MONTH_NAME_FULL = /January|February|March|April|May|June|July|August|September|October|November|December/
|
15
|
+
|
16
|
+
WEEKDAY_NAME_SHORT = /Sun|Mon|Tue|Wed|Thu|Fri|Sat/
|
17
|
+
WEEKDAY_NAME_FULL = /Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday/
|
18
|
+
end
|
19
|
+
|
20
|
+
class GuessMatch
|
21
|
+
def initialize(delimiters, parts, part_options)
|
22
|
+
@delimiters = delimiters
|
23
|
+
@parts = parts
|
24
|
+
@part_options = part_options
|
25
|
+
end
|
26
|
+
|
27
|
+
def format
|
28
|
+
format = ''
|
29
|
+
@parts.size.times do |i|
|
30
|
+
format << @delimiters[i-1] if i != 0
|
31
|
+
option = @part_options[i]
|
32
|
+
|
33
|
+
case @parts[i]
|
34
|
+
when :year
|
35
|
+
format << '%Y'
|
36
|
+
|
37
|
+
when :month
|
38
|
+
case option
|
39
|
+
when :zero
|
40
|
+
format << '%m'
|
41
|
+
when :blank
|
42
|
+
#format << '%_m' # not supported
|
43
|
+
format << '%m'
|
44
|
+
when :none
|
45
|
+
#format << '%-m' # not supported
|
46
|
+
format << '%m'
|
47
|
+
else
|
48
|
+
format << '%m'
|
49
|
+
end
|
50
|
+
|
51
|
+
when :day
|
52
|
+
case option
|
53
|
+
when :zero
|
54
|
+
format << '%d'
|
55
|
+
when :blank
|
56
|
+
format << '%e'
|
57
|
+
when :none
|
58
|
+
format << '%d' # not supported
|
59
|
+
else
|
60
|
+
format << '%d'
|
61
|
+
end
|
62
|
+
|
63
|
+
when :hour
|
64
|
+
case option
|
65
|
+
when :zero
|
66
|
+
format << '%H'
|
67
|
+
when :blank
|
68
|
+
format << '%k'
|
69
|
+
when :none
|
70
|
+
format << '%k' # not supported
|
71
|
+
else
|
72
|
+
format << '%H'
|
73
|
+
end
|
74
|
+
|
75
|
+
when :minute
|
76
|
+
# heading options are not supported
|
77
|
+
format << '%M'
|
78
|
+
|
79
|
+
when :second
|
80
|
+
# heading options are not supported
|
81
|
+
format << '%S'
|
82
|
+
|
83
|
+
when :frac
|
84
|
+
if option <= 3
|
85
|
+
format << '%L'
|
86
|
+
#elsif option <= 6
|
87
|
+
# format << '%6N'
|
88
|
+
#elsif option <= 6
|
89
|
+
# format << '%6N'
|
90
|
+
#elsif option <= 9
|
91
|
+
# format << '%9N'
|
92
|
+
#elsif option <= 12
|
93
|
+
# format << '%12N'
|
94
|
+
#elsif option <= 15
|
95
|
+
# format << '%15N'
|
96
|
+
#elsif option <= 18
|
97
|
+
# format << '%18N'
|
98
|
+
#elsif option <= 21
|
99
|
+
# format << '%21N'
|
100
|
+
#elsif option <= 24
|
101
|
+
# format << '%24N'
|
102
|
+
else
|
103
|
+
format << '%N'
|
104
|
+
end
|
105
|
+
|
106
|
+
when :zone_off
|
107
|
+
format << '%z'
|
108
|
+
|
109
|
+
when :zone_abb
|
110
|
+
format << '%Z'
|
111
|
+
|
112
|
+
else
|
113
|
+
raise "Unknown part: #{@parts[i]}"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
return format
|
118
|
+
end
|
119
|
+
|
120
|
+
def mergeable_group
|
121
|
+
[@delimiters, @parts]
|
122
|
+
end
|
123
|
+
|
124
|
+
attr_reader :part_options
|
125
|
+
|
126
|
+
def merge!(another_in_group)
|
127
|
+
part_options = another_in_group.part_options
|
128
|
+
@part_options.size.times do |i|
|
129
|
+
@part_options[i] ||= part_options[i]
|
130
|
+
if @part_options[i] == nil
|
131
|
+
part_options[i]
|
132
|
+
elsif part_options[i] == nil
|
133
|
+
@part_options[i]
|
134
|
+
else
|
135
|
+
[@part_options[i], part_options[i]].sort.last
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
class GuessPattern
|
142
|
+
include Parts
|
143
|
+
|
144
|
+
date_delims = /[\/\-]/
|
145
|
+
# yyyy-MM-dd
|
146
|
+
YMD = /(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
|
147
|
+
YMD_NODELIM = /(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
|
148
|
+
# dd/MM/yyyy
|
149
|
+
DMY = /(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
|
150
|
+
DMY_NODELIM = /(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
|
151
|
+
|
152
|
+
frac = /[0-9]{1,24}/
|
153
|
+
time_delims = /[\:\-]/
|
154
|
+
frac_delims = /[\.\,]/
|
155
|
+
TIME = /(?<hour>#{HOUR})(?<time_delim>#{time_delims})(?<minute>#{MINUTE})(?:\k<time_delim>(?<second>#{SECOND})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?/
|
156
|
+
TIME_NODELIM = /(?<hour>#{HOUR_NODELIM})(?<minute>#{MINUTE_NODELIM})((?<second>#{SECOND_NODELIM})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?/
|
157
|
+
|
158
|
+
TZ = /(?<zone_space> )?(?<zone>(?<zone_off>[\-\+]\d\d(?::?\d\d)?)|(?<zone_abb>[A-Z]{3}))|(?<z>Z)/
|
159
|
+
|
160
|
+
def match(text)
|
161
|
+
delimiters = []
|
162
|
+
parts = []
|
163
|
+
part_options = []
|
164
|
+
|
165
|
+
if dm = (/^#{YMD}(?<rest>.*?)$/.match(text) or /^#{YMD_NODELIM}(?<rest>.*?)$/.match(text))
|
166
|
+
date_delim = dm["date_delim"] rescue ""
|
167
|
+
|
168
|
+
parts << :year
|
169
|
+
part_options << nil
|
170
|
+
delimiters << date_delim
|
171
|
+
|
172
|
+
parts << :month
|
173
|
+
part_options << part_heading_option(dm["month"])
|
174
|
+
delimiters << date_delim
|
175
|
+
|
176
|
+
parts << :day
|
177
|
+
part_options << part_heading_option(dm["day"])
|
178
|
+
|
179
|
+
elsif dm = (/^#{DMY}(?<rest>.*?)$/.match(text) or /^#{DMY_NODELIM}(?<rest>.*?)$/.match(text))
|
180
|
+
date_delim = dm["date_delim"] rescue ""
|
181
|
+
|
182
|
+
parts << :day
|
183
|
+
part_options << part_heading_option(dm["day"])
|
184
|
+
delimiters << date_delim
|
185
|
+
|
186
|
+
parts << :month
|
187
|
+
part_options << part_heading_option(dm["month"])
|
188
|
+
delimiters << date_delim
|
189
|
+
|
190
|
+
parts << :year
|
191
|
+
part_options << nil
|
192
|
+
delimiters << date_delim
|
193
|
+
|
194
|
+
else
|
195
|
+
date_delim = ""
|
196
|
+
return nil
|
197
|
+
end
|
198
|
+
rest = dm["rest"]
|
199
|
+
|
200
|
+
date_time_delims = /[ _T]/
|
201
|
+
if tm = (
|
202
|
+
/^(?<date_time_delim>#{date_time_delims})#{TIME}(?<rest>.*?)?$/.match(rest) or
|
203
|
+
/^(?<date_time_delim>#{date_time_delims})#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest) or
|
204
|
+
(date_delim == "" && /^#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest))
|
205
|
+
)
|
206
|
+
date_time_delim = tm["date_time_delim"] rescue ""
|
207
|
+
time_delim = tm["time_delim"] rescue ""
|
208
|
+
|
209
|
+
delimiters << date_time_delim
|
210
|
+
parts << :hour
|
211
|
+
part_options << part_heading_option(tm["hour"])
|
212
|
+
|
213
|
+
delimiters << time_delim
|
214
|
+
parts << :minute
|
215
|
+
part_options << part_heading_option(tm["minute"])
|
216
|
+
|
217
|
+
if tm["second"]
|
218
|
+
delimiters << time_delim
|
219
|
+
parts << :second
|
220
|
+
part_options << part_heading_option(tm["second"])
|
221
|
+
end
|
222
|
+
|
223
|
+
if tm["frac"]
|
224
|
+
delimiters << tm["frac_delim"]
|
225
|
+
parts << :frac
|
226
|
+
part_options << tm["frac"].size
|
227
|
+
end
|
228
|
+
|
229
|
+
rest = tm["rest"]
|
230
|
+
end
|
231
|
+
|
232
|
+
if zm = /^#{TZ}$/.match(rest)
|
233
|
+
delimiters << zm["zone_space"] || ''
|
234
|
+
if zm["z"]
|
235
|
+
# TODO ISO 8601
|
236
|
+
parts << :zone_off
|
237
|
+
elsif zm["zone_off"]
|
238
|
+
parts << :zone_off
|
239
|
+
else
|
240
|
+
parts << :zone_abb
|
241
|
+
end
|
242
|
+
part_options << nil
|
243
|
+
|
244
|
+
return GuessMatch.new(delimiters, parts, part_options)
|
245
|
+
|
246
|
+
elsif rest =~ /^\s*$/
|
247
|
+
return GuessMatch.new(delimiters, parts, part_options)
|
248
|
+
|
249
|
+
else
|
250
|
+
return nil
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def part_heading_option(text)
|
255
|
+
if text[0] == '0'
|
256
|
+
:zero
|
257
|
+
elsif text[0] == ' '
|
258
|
+
:blank
|
259
|
+
elsif text.size == 1
|
260
|
+
:none
|
261
|
+
else
|
262
|
+
nil
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
class RegexpMatch
|
268
|
+
def initialize(format)
|
269
|
+
@format
|
270
|
+
end
|
271
|
+
|
272
|
+
attr_reader :format
|
273
|
+
|
274
|
+
def mergeable_group
|
275
|
+
@format
|
276
|
+
end
|
277
|
+
|
278
|
+
def merge!(another_in_group)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
class RegexpPattern
|
283
|
+
def initialize(regexp, format)
|
284
|
+
@regexp = regexp
|
285
|
+
@match = RegexpMatch.new(format)
|
286
|
+
end
|
287
|
+
|
288
|
+
def match(text)
|
289
|
+
if @regexp =~ text
|
290
|
+
return @match
|
291
|
+
else
|
292
|
+
return nil
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
module StandardPatterns
|
298
|
+
include Parts
|
299
|
+
|
300
|
+
RFC_822_1123 = /^#{WEEKDAY_NAME_SHORT}, \d\d #{MONTH_NAME_SHORT} \d\d\d\d \d\d:\d\d:\d\d [a-zA-Z]{3}$/
|
301
|
+
RFC_850_1035 = /^#{WEEKDAY_NAME_FULL}, \d\d-#{MONTH_NAME_SHORT}-\d\d \d\d:\d\d:\d\d [a-zA-Z]{3}$/
|
302
|
+
APACHE_CLF = /^\d\d\/#{MONTH_NAME_SHORT}\/\d\d\d\d \d\d:\d\d:\d\d [\-\+]\d\d(?::?\d\d)?$/
|
303
|
+
ANSI_C_ASCTIME = /^#{WEEKDAY_NAME_SHORT} #{MONTH_NAME_SHORT} \d\d? \d\d:\d\d:\d\d \d\d\d\d$/
|
304
|
+
end
|
305
|
+
|
306
|
+
PATTERNS = [
|
307
|
+
GuessPattern.new,
|
308
|
+
RegexpPattern.new(StandardPatterns::RFC_822_1123, "%a, %d %b %Y %H:%M:%S %z"),
|
309
|
+
RegexpPattern.new(StandardPatterns::RFC_850_1035, "%A, %d-%b-%y %H:%M:%S %z"),
|
310
|
+
RegexpPattern.new(StandardPatterns::APACHE_CLF, "%d/%b/%Y %H:%M:%S %Z"),
|
311
|
+
RegexpPattern.new(StandardPatterns::ANSI_C_ASCTIME, "$a %b %e %H:%M:%S %Y"),
|
312
|
+
]
|
313
|
+
|
314
|
+
def self.guess(texts)
|
315
|
+
texts = Array(texts).select {|text| text != "" }
|
316
|
+
matches = texts.map do |text|
|
317
|
+
PATTERNS.map {|pattern| pattern.match(text) }.compact
|
318
|
+
end.flatten
|
319
|
+
if matches.empty?
|
320
|
+
return nil
|
321
|
+
elsif matches.size == 1
|
322
|
+
return matches[0].format
|
323
|
+
else
|
324
|
+
match_groups = matches.group_by {|match| match.mergeable_group }
|
325
|
+
best_match_group = match_groups.sort_by {|group| group.size }.last[1]
|
326
|
+
best_match = best_match_group.shift
|
327
|
+
best_match_group.each {|m| best_match.merge!(m) }
|
328
|
+
return best_match.format
|
329
|
+
end
|
330
|
+
end
|
331
|
+
end
|