caleb-chronic 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +167 -0
- data/lib/chronic.rb +127 -0
- data/lib/chronic/chronic.rb +249 -0
- data/lib/chronic/grabber.rb +26 -0
- data/lib/chronic/handlers.rb +524 -0
- data/lib/chronic/ordinal.rb +40 -0
- data/lib/chronic/pointer.rb +27 -0
- data/lib/chronic/repeater.rb +129 -0
- data/lib/chronic/repeaters/repeater_day.rb +52 -0
- data/lib/chronic/repeaters/repeater_day_name.rb +51 -0
- data/lib/chronic/repeaters/repeater_day_portion.rb +94 -0
- data/lib/chronic/repeaters/repeater_fortnight.rb +70 -0
- data/lib/chronic/repeaters/repeater_hour.rb +57 -0
- data/lib/chronic/repeaters/repeater_minute.rb +57 -0
- data/lib/chronic/repeaters/repeater_month.rb +66 -0
- data/lib/chronic/repeaters/repeater_month_name.rb +98 -0
- data/lib/chronic/repeaters/repeater_season.rb +150 -0
- data/lib/chronic/repeaters/repeater_season_name.rb +45 -0
- data/lib/chronic/repeaters/repeater_second.rb +41 -0
- data/lib/chronic/repeaters/repeater_time.rb +124 -0
- data/lib/chronic/repeaters/repeater_week.rb +73 -0
- data/lib/chronic/repeaters/repeater_weekday.rb +77 -0
- data/lib/chronic/repeaters/repeater_weekend.rb +65 -0
- data/lib/chronic/repeaters/repeater_year.rb +64 -0
- data/lib/chronic/scalar.rb +76 -0
- data/lib/chronic/separator.rb +91 -0
- data/lib/chronic/time_zone.rb +23 -0
- data/lib/numerizer/numerizer.rb +97 -0
- data/test/suite.rb +9 -0
- data/test/test_Chronic.rb +50 -0
- data/test/test_Handler.rb +110 -0
- data/test/test_Numerizer.rb +52 -0
- data/test/test_RepeaterDayName.rb +52 -0
- data/test/test_RepeaterFortnight.rb +63 -0
- data/test/test_RepeaterHour.rb +65 -0
- data/test/test_RepeaterMonth.rb +47 -0
- data/test/test_RepeaterMonthName.rb +57 -0
- data/test/test_RepeaterTime.rb +72 -0
- data/test/test_RepeaterWeek.rb +63 -0
- data/test/test_RepeaterWeekday.rb +56 -0
- data/test/test_RepeaterWeekend.rb +75 -0
- data/test/test_RepeaterYear.rb +63 -0
- data/test/test_Span.rb +24 -0
- data/test/test_Time.rb +50 -0
- data/test/test_Token.rb +26 -0
- data/test/test_parsing.rb +711 -0
- metadata +101 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
module Chronic
|
2
|
+
|
3
|
+
class Scalar < Tag #:nodoc:
|
4
|
+
def self.scan(tokens)
|
5
|
+
# for each token
|
6
|
+
tokens.each_index do |i|
|
7
|
+
if t = self.scan_for_scalars(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
|
8
|
+
if t = self.scan_for_days(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
|
9
|
+
if t = self.scan_for_months(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
|
10
|
+
if t = self.scan_for_years(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
|
11
|
+
end
|
12
|
+
tokens
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.scan_for_scalars(token, post_token)
|
16
|
+
if token.word =~ /^\d*$/
|
17
|
+
unless post_token && %w{am pm morning afternoon evening night}.include?(post_token)
|
18
|
+
return Scalar.new(token.word.to_i)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.scan_for_days(token, post_token)
|
25
|
+
if token.word =~ /^\d\d?$/
|
26
|
+
toi = token.word.to_i
|
27
|
+
unless toi > 31 || toi < 1 || (post_token && %w{am pm morning afternoon evening night}.include?(post_token.word))
|
28
|
+
return ScalarDay.new(toi)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
return nil
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.scan_for_months(token, post_token)
|
35
|
+
if token.word =~ /^\d\d?$/
|
36
|
+
toi = token.word.to_i
|
37
|
+
unless toi > 12 || toi < 1 || (post_token && %w{am pm morning afternoon evening night}.include?(post_token.word))
|
38
|
+
return ScalarMonth.new(toi)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.scan_for_years(token, post_token)
|
45
|
+
if token.word =~ /^([1-9]\d)?\d\d?$/
|
46
|
+
unless post_token && %w{am pm morning afternoon evening night}.include?(post_token.word)
|
47
|
+
return ScalarYear.new(token.word.to_i)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
'scalar'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class ScalarDay < Scalar #:nodoc:
|
59
|
+
def to_s
|
60
|
+
super << '-day-' << @type.to_s
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class ScalarMonth < Scalar #:nodoc:
|
65
|
+
def to_s
|
66
|
+
super << '-month-' << @type.to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class ScalarYear < Scalar #:nodoc:
|
71
|
+
def to_s
|
72
|
+
super << '-year-' << @type.to_s
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Chronic
|
2
|
+
|
3
|
+
class Separator < Tag #:nodoc:
|
4
|
+
def self.scan(tokens)
|
5
|
+
tokens.each_index do |i|
|
6
|
+
if t = self.scan_for_commas(tokens[i]) then tokens[i].tag(t); next end
|
7
|
+
if t = self.scan_for_slash_or_dash(tokens[i]) then tokens[i].tag(t); next end
|
8
|
+
if t = self.scan_for_at(tokens[i]) then tokens[i].tag(t); next end
|
9
|
+
if t = self.scan_for_in(tokens[i]) then tokens[i].tag(t); next end
|
10
|
+
if t = self.scan_for_on(tokens[i]) then tokens[i].tag(t); next end
|
11
|
+
end
|
12
|
+
tokens
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.scan_for_commas(token)
|
16
|
+
scanner = {/^,$/ => :comma}
|
17
|
+
scanner.keys.each do |scanner_item|
|
18
|
+
return SeparatorComma.new(scanner[scanner_item]) if scanner_item =~ token.word
|
19
|
+
end
|
20
|
+
return nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.scan_for_slash_or_dash(token)
|
24
|
+
scanner = {/^-$/ => :dash,
|
25
|
+
/^\/$/ => :slash}
|
26
|
+
scanner.keys.each do |scanner_item|
|
27
|
+
return SeparatorSlashOrDash.new(scanner[scanner_item]) if scanner_item =~ token.word
|
28
|
+
end
|
29
|
+
return nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.scan_for_at(token)
|
33
|
+
scanner = {/^(at|@)$/ => :at}
|
34
|
+
scanner.keys.each do |scanner_item|
|
35
|
+
return SeparatorAt.new(scanner[scanner_item]) if scanner_item =~ token.word
|
36
|
+
end
|
37
|
+
return nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.scan_for_in(token)
|
41
|
+
scanner = {/^in$/ => :in}
|
42
|
+
scanner.keys.each do |scanner_item|
|
43
|
+
return SeparatorIn.new(scanner[scanner_item]) if scanner_item =~ token.word
|
44
|
+
end
|
45
|
+
return nil
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.scan_for_on(token)
|
49
|
+
scanner = {/^on$/ => :on}
|
50
|
+
scanner.keys.each do |scanner_item|
|
51
|
+
return SeparatorOn.new(scanner[scanner_item]) if scanner_item =~ token.word
|
52
|
+
end
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_s
|
57
|
+
'separator'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class SeparatorComma < Separator #:nodoc:
|
62
|
+
def to_s
|
63
|
+
super << '-comma'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class SeparatorSlashOrDash < Separator #:nodoc:
|
68
|
+
def to_s
|
69
|
+
super << '-slashordash-' << @type.to_s
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class SeparatorAt < Separator #:nodoc:
|
74
|
+
def to_s
|
75
|
+
super << '-at'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class SeparatorIn < Separator #:nodoc:
|
80
|
+
def to_s
|
81
|
+
super << '-in'
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class SeparatorOn < Separator #:nodoc:
|
86
|
+
def to_s
|
87
|
+
super << '-on'
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Chronic
|
2
|
+
class TimeZone < Tag #:nodoc:
|
3
|
+
def self.scan(tokens)
|
4
|
+
tokens.each_index do |i|
|
5
|
+
if t = self.scan_for_all(tokens[i]) then tokens[i].tag(t); next end
|
6
|
+
end
|
7
|
+
tokens
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.scan_for_all(token)
|
11
|
+
scanner = {/[PMCE][DS]T/i => :tz,
|
12
|
+
/(tzminus)?\d{4}/ => :tz}
|
13
|
+
scanner.keys.each do |scanner_item|
|
14
|
+
return self.new(scanner[scanner_item]) if scanner_item =~ token.word
|
15
|
+
end
|
16
|
+
return nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
'timezone'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
class Numerizer
|
4
|
+
|
5
|
+
DIRECT_NUMS = [
|
6
|
+
['eleven', '11'],
|
7
|
+
['twelve', '12'],
|
8
|
+
['thirteen', '13'],
|
9
|
+
['fourteen', '14'],
|
10
|
+
['fifteen', '15'],
|
11
|
+
['sixteen', '16'],
|
12
|
+
['seventeen', '17'],
|
13
|
+
['eighteen', '18'],
|
14
|
+
['nineteen', '19'],
|
15
|
+
['ninteen', '19'], # Common mis-spelling
|
16
|
+
['zero', '0'],
|
17
|
+
['one', '1'],
|
18
|
+
['two', '2'],
|
19
|
+
['three', '3'],
|
20
|
+
['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
21
|
+
['five', '5'],
|
22
|
+
['six(\W|$)', '6\1'],
|
23
|
+
['seven(\W|$)', '7\1'],
|
24
|
+
['eight(\W|$)', '8\1'],
|
25
|
+
['nine(\W|$)', '9\1'],
|
26
|
+
['ten', '10'],
|
27
|
+
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
|
28
|
+
]
|
29
|
+
|
30
|
+
TEN_PREFIXES = [ ['twenty', 20],
|
31
|
+
['thirty', 30],
|
32
|
+
['fourty', 40],
|
33
|
+
['fifty', 50],
|
34
|
+
['sixty', 60],
|
35
|
+
['seventy', 70],
|
36
|
+
['eighty', 80],
|
37
|
+
['ninety', 90]
|
38
|
+
]
|
39
|
+
|
40
|
+
BIG_PREFIXES = [ ['hundred', 100],
|
41
|
+
['thousand', 1000],
|
42
|
+
['million', 1_000_000],
|
43
|
+
['billion', 1_000_000_000],
|
44
|
+
['trillion', 1_000_000_000_000],
|
45
|
+
]
|
46
|
+
|
47
|
+
def self.numerize(string)
|
48
|
+
string = string.dup
|
49
|
+
|
50
|
+
# preprocess
|
51
|
+
string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
|
52
|
+
string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
|
53
|
+
|
54
|
+
# easy/direct replacements
|
55
|
+
|
56
|
+
DIRECT_NUMS.each do |dn|
|
57
|
+
string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
|
58
|
+
end
|
59
|
+
|
60
|
+
# ten, twenty, etc.
|
61
|
+
|
62
|
+
TEN_PREFIXES.each do |tp|
|
63
|
+
string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
|
64
|
+
end
|
65
|
+
|
66
|
+
TEN_PREFIXES.each do |tp|
|
67
|
+
string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
|
68
|
+
end
|
69
|
+
|
70
|
+
# hundreds, thousands, millions, etc.
|
71
|
+
|
72
|
+
BIG_PREFIXES.each do |bp|
|
73
|
+
string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
|
74
|
+
andition(string)
|
75
|
+
end
|
76
|
+
|
77
|
+
# fractional addition
|
78
|
+
# I'm not combining this with the previous block as using float addition complicates the strings
|
79
|
+
# (with extraneous .0's and such )
|
80
|
+
string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
|
81
|
+
|
82
|
+
string.gsub(/<num>/, '')
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def self.andition(string)
|
88
|
+
sc = StringScanner.new(string)
|
89
|
+
while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
|
90
|
+
if sc[2] =~ /and/ || sc[1].size > sc[3].size
|
91
|
+
string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
|
92
|
+
sc.reset
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
data/test/suite.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'chronic'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class TestChronic < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
# Wed Aug 16 14:00:00 UTC 2006
|
8
|
+
@now = Time.local(2006, 8, 16, 14, 0, 0, 0)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_post_normalize_am_pm_aliases
|
12
|
+
# affect wanted patterns
|
13
|
+
|
14
|
+
tokens = [Chronic::Token.new("5:00"), Chronic::Token.new("morning")]
|
15
|
+
tokens[0].tag(Chronic::RepeaterTime.new("5:00"))
|
16
|
+
tokens[1].tag(Chronic::RepeaterDayPortion.new(:morning))
|
17
|
+
|
18
|
+
assert_equal :morning, tokens[1].tags[0].type
|
19
|
+
|
20
|
+
tokens = Chronic.dealias_and_disambiguate_times(tokens, {})
|
21
|
+
|
22
|
+
assert_equal :am, tokens[1].tags[0].type
|
23
|
+
assert_equal 2, tokens.size
|
24
|
+
|
25
|
+
# don't affect unwanted patterns
|
26
|
+
|
27
|
+
tokens = [Chronic::Token.new("friday"), Chronic::Token.new("morning")]
|
28
|
+
tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
|
29
|
+
tokens[1].tag(Chronic::RepeaterDayPortion.new(:morning))
|
30
|
+
|
31
|
+
assert_equal :morning, tokens[1].tags[0].type
|
32
|
+
|
33
|
+
tokens = Chronic.dealias_and_disambiguate_times(tokens, {})
|
34
|
+
|
35
|
+
assert_equal :morning, tokens[1].tags[0].type
|
36
|
+
assert_equal 2, tokens.size
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_guess
|
40
|
+
span = Chronic::Span.new(Time.local(2006, 8, 16, 0), Time.local(2006, 8, 17, 0))
|
41
|
+
assert_equal Time.local(2006, 8, 16, 12), Chronic.guess(span)
|
42
|
+
|
43
|
+
span = Chronic::Span.new(Time.local(2006, 8, 16, 0), Time.local(2006, 8, 17, 0, 0, 1))
|
44
|
+
assert_equal Time.local(2006, 8, 16, 12), Chronic.guess(span)
|
45
|
+
|
46
|
+
span = Chronic::Span.new(Time.local(2006, 11), Time.local(2006, 12))
|
47
|
+
assert_equal Time.local(2006, 11, 16), Chronic.guess(span)
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'chronic'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class TestHandler < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
# Wed Aug 16 14:00:00 UTC 2006
|
8
|
+
@now = Time.local(2006, 8, 16, 14, 0, 0, 0)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_handler_class_1
|
12
|
+
handler = Chronic::Handler.new([:repeater], :handler)
|
13
|
+
|
14
|
+
tokens = [Chronic::Token.new('friday')]
|
15
|
+
tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
|
16
|
+
|
17
|
+
assert handler.match(tokens, Chronic.definitions)
|
18
|
+
|
19
|
+
tokens << Chronic::Token.new('afternoon')
|
20
|
+
tokens[1].tag(Chronic::RepeaterDayPortion.new(:afternoon))
|
21
|
+
|
22
|
+
assert !handler.match(tokens, Chronic.definitions)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_handler_class_2
|
26
|
+
handler = Chronic::Handler.new([:repeater, :repeater?], :handler)
|
27
|
+
|
28
|
+
tokens = [Chronic::Token.new('friday')]
|
29
|
+
tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
|
30
|
+
|
31
|
+
assert handler.match(tokens, Chronic.definitions)
|
32
|
+
|
33
|
+
tokens << Chronic::Token.new('afternoon')
|
34
|
+
tokens[1].tag(Chronic::RepeaterDayPortion.new(:afternoon))
|
35
|
+
|
36
|
+
assert handler.match(tokens, Chronic.definitions)
|
37
|
+
|
38
|
+
tokens << Chronic::Token.new('afternoon')
|
39
|
+
tokens[2].tag(Chronic::RepeaterDayPortion.new(:afternoon))
|
40
|
+
|
41
|
+
assert !handler.match(tokens, Chronic.definitions)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_handler_class_3
|
45
|
+
handler = Chronic::Handler.new([:repeater, 'time?'], :handler)
|
46
|
+
|
47
|
+
tokens = [Chronic::Token.new('friday')]
|
48
|
+
tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
|
49
|
+
|
50
|
+
assert handler.match(tokens, Chronic.definitions)
|
51
|
+
|
52
|
+
tokens << Chronic::Token.new('afternoon')
|
53
|
+
tokens[1].tag(Chronic::RepeaterDayPortion.new(:afternoon))
|
54
|
+
|
55
|
+
assert !handler.match(tokens, Chronic.definitions)
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_handler_class_4
|
59
|
+
handler = Chronic::Handler.new([:repeater_month_name, :scalar_day, 'time?'], :handler)
|
60
|
+
|
61
|
+
tokens = [Chronic::Token.new('may')]
|
62
|
+
tokens[0].tag(Chronic::RepeaterMonthName.new(:may))
|
63
|
+
|
64
|
+
assert !handler.match(tokens, Chronic.definitions)
|
65
|
+
|
66
|
+
tokens << Chronic::Token.new('27')
|
67
|
+
tokens[1].tag(Chronic::ScalarDay.new(27))
|
68
|
+
|
69
|
+
assert handler.match(tokens, Chronic.definitions)
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_handler_class_5
|
73
|
+
handler = Chronic::Handler.new([:repeater, 'time?'], :handler)
|
74
|
+
|
75
|
+
tokens = [Chronic::Token.new('friday')]
|
76
|
+
tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
|
77
|
+
|
78
|
+
assert handler.match(tokens, Chronic.definitions)
|
79
|
+
|
80
|
+
tokens << Chronic::Token.new('5:00')
|
81
|
+
tokens[1].tag(Chronic::RepeaterTime.new('5:00'))
|
82
|
+
|
83
|
+
assert handler.match(tokens, Chronic.definitions)
|
84
|
+
|
85
|
+
tokens << Chronic::Token.new('pm')
|
86
|
+
tokens[2].tag(Chronic::RepeaterDayPortion.new(:pm))
|
87
|
+
|
88
|
+
assert handler.match(tokens, Chronic.definitions)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_handler_class_6
|
92
|
+
handler = Chronic::Handler.new([:scalar, :repeater, :pointer], :handler)
|
93
|
+
|
94
|
+
tokens = [Chronic::Token.new('3'),
|
95
|
+
Chronic::Token.new('years'),
|
96
|
+
Chronic::Token.new('past')]
|
97
|
+
|
98
|
+
tokens[0].tag(Chronic::Scalar.new(3))
|
99
|
+
tokens[1].tag(Chronic::RepeaterYear.new(:year))
|
100
|
+
tokens[2].tag(Chronic::Pointer.new(:past))
|
101
|
+
|
102
|
+
assert handler.match(tokens, Chronic.definitions)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_constantize
|
106
|
+
handler = Chronic::Handler.new([], :handler)
|
107
|
+
assert_equal Chronic::RepeaterTime, handler.constantize(:repeater_time)
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|