mojombo-chronic 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/README +167 -0
  2. data/lib/chronic.rb +127 -0
  3. data/lib/chronic/chronic.rb +248 -0
  4. data/lib/chronic/grabber.rb +26 -0
  5. data/lib/chronic/handlers.rb +524 -0
  6. data/lib/chronic/ordinal.rb +40 -0
  7. data/lib/chronic/pointer.rb +27 -0
  8. data/lib/chronic/repeater.rb +129 -0
  9. data/lib/chronic/repeaters/repeater_day.rb +52 -0
  10. data/lib/chronic/repeaters/repeater_day_name.rb +51 -0
  11. data/lib/chronic/repeaters/repeater_day_portion.rb +94 -0
  12. data/lib/chronic/repeaters/repeater_fortnight.rb +70 -0
  13. data/lib/chronic/repeaters/repeater_hour.rb +57 -0
  14. data/lib/chronic/repeaters/repeater_minute.rb +57 -0
  15. data/lib/chronic/repeaters/repeater_month.rb +66 -0
  16. data/lib/chronic/repeaters/repeater_month_name.rb +98 -0
  17. data/lib/chronic/repeaters/repeater_season.rb +150 -0
  18. data/lib/chronic/repeaters/repeater_season_name.rb +45 -0
  19. data/lib/chronic/repeaters/repeater_second.rb +41 -0
  20. data/lib/chronic/repeaters/repeater_time.rb +120 -0
  21. data/lib/chronic/repeaters/repeater_week.rb +73 -0
  22. data/lib/chronic/repeaters/repeater_weekday.rb +77 -0
  23. data/lib/chronic/repeaters/repeater_weekend.rb +65 -0
  24. data/lib/chronic/repeaters/repeater_year.rb +64 -0
  25. data/lib/chronic/scalar.rb +76 -0
  26. data/lib/chronic/separator.rb +91 -0
  27. data/lib/chronic/time_zone.rb +23 -0
  28. data/lib/numerizer/numerizer.rb +97 -0
  29. data/test/suite.rb +9 -0
  30. data/test/test_Chronic.rb +50 -0
  31. data/test/test_Handler.rb +110 -0
  32. data/test/test_Numerizer.rb +52 -0
  33. data/test/test_RepeaterDayName.rb +52 -0
  34. data/test/test_RepeaterFortnight.rb +63 -0
  35. data/test/test_RepeaterHour.rb +65 -0
  36. data/test/test_RepeaterMonth.rb +47 -0
  37. data/test/test_RepeaterMonthName.rb +57 -0
  38. data/test/test_RepeaterTime.rb +72 -0
  39. data/test/test_RepeaterWeek.rb +63 -0
  40. data/test/test_RepeaterWeekday.rb +56 -0
  41. data/test/test_RepeaterWeekend.rb +75 -0
  42. data/test/test_RepeaterYear.rb +63 -0
  43. data/test/test_Span.rb +24 -0
  44. data/test/test_Time.rb +50 -0
  45. data/test/test_Token.rb +26 -0
  46. data/test/test_parsing.rb +706 -0
  47. metadata +102 -0
@@ -0,0 +1,91 @@
1
+ module Chronic
2
+
3
+ class Separator < Tag #:nodoc:
4
+ def self.scan(tokens)
5
+ tokens.each_index do |i|
6
+ if t = self.scan_for_commas(tokens[i]) then tokens[i].tag(t); next end
7
+ if t = self.scan_for_slash_or_dash(tokens[i]) then tokens[i].tag(t); next end
8
+ if t = self.scan_for_at(tokens[i]) then tokens[i].tag(t); next end
9
+ if t = self.scan_for_in(tokens[i]) then tokens[i].tag(t); next end
10
+ if t = self.scan_for_on(tokens[i]) then tokens[i].tag(t); next end
11
+ end
12
+ tokens
13
+ end
14
+
15
+ def self.scan_for_commas(token)
16
+ scanner = {/^,$/ => :comma}
17
+ scanner.keys.each do |scanner_item|
18
+ return SeparatorComma.new(scanner[scanner_item]) if scanner_item =~ token.word
19
+ end
20
+ return nil
21
+ end
22
+
23
+ def self.scan_for_slash_or_dash(token)
24
+ scanner = {/^-$/ => :dash,
25
+ /^\/$/ => :slash}
26
+ scanner.keys.each do |scanner_item|
27
+ return SeparatorSlashOrDash.new(scanner[scanner_item]) if scanner_item =~ token.word
28
+ end
29
+ return nil
30
+ end
31
+
32
+ def self.scan_for_at(token)
33
+ scanner = {/^(at|@)$/ => :at}
34
+ scanner.keys.each do |scanner_item|
35
+ return SeparatorAt.new(scanner[scanner_item]) if scanner_item =~ token.word
36
+ end
37
+ return nil
38
+ end
39
+
40
+ def self.scan_for_in(token)
41
+ scanner = {/^in$/ => :in}
42
+ scanner.keys.each do |scanner_item|
43
+ return SeparatorIn.new(scanner[scanner_item]) if scanner_item =~ token.word
44
+ end
45
+ return nil
46
+ end
47
+
48
+ def self.scan_for_on(token)
49
+ scanner = {/^on$/ => :on}
50
+ scanner.keys.each do |scanner_item|
51
+ return SeparatorOn.new(scanner[scanner_item]) if scanner_item =~ token.word
52
+ end
53
+ return nil
54
+ end
55
+
56
+ def to_s
57
+ 'separator'
58
+ end
59
+ end
60
+
61
+ class SeparatorComma < Separator #:nodoc:
62
+ def to_s
63
+ super << '-comma'
64
+ end
65
+ end
66
+
67
+ class SeparatorSlashOrDash < Separator #:nodoc:
68
+ def to_s
69
+ super << '-slashordash-' << @type.to_s
70
+ end
71
+ end
72
+
73
+ class SeparatorAt < Separator #:nodoc:
74
+ def to_s
75
+ super << '-at'
76
+ end
77
+ end
78
+
79
+ class SeparatorIn < Separator #:nodoc:
80
+ def to_s
81
+ super << '-in'
82
+ end
83
+ end
84
+
85
+ class SeparatorOn < Separator #:nodoc:
86
+ def to_s
87
+ super << '-on'
88
+ end
89
+ end
90
+
91
+ end
@@ -0,0 +1,23 @@
1
+ module Chronic
2
+ class TimeZone < Tag #:nodoc:
3
+ def self.scan(tokens)
4
+ tokens.each_index do |i|
5
+ if t = self.scan_for_all(tokens[i]) then tokens[i].tag(t); next end
6
+ end
7
+ tokens
8
+ end
9
+
10
+ def self.scan_for_all(token)
11
+ scanner = {/[PMCE][DS]T/i => :tz,
12
+ /(tzminus)?\d{4}/ => :tz}
13
+ scanner.keys.each do |scanner_item|
14
+ return self.new(scanner[scanner_item]) if scanner_item =~ token.word
15
+ end
16
+ return nil
17
+ end
18
+
19
+ def to_s
20
+ 'timezone'
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,97 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['eleven', '11'],
7
+ ['twelve', '12'],
8
+ ['thirteen', '13'],
9
+ ['fourteen', '14'],
10
+ ['fifteen', '15'],
11
+ ['sixteen', '16'],
12
+ ['seventeen', '17'],
13
+ ['eighteen', '18'],
14
+ ['nineteen', '19'],
15
+ ['ninteen', '19'], # Common mis-spelling
16
+ ['zero', '0'],
17
+ ['one', '1'],
18
+ ['two', '2'],
19
+ ['three', '3'],
20
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
21
+ ['five', '5'],
22
+ ['six(\W|$)', '6\1'],
23
+ ['seven(\W|$)', '7\1'],
24
+ ['eight(\W|$)', '8\1'],
25
+ ['nine(\W|$)', '9\1'],
26
+ ['ten', '10'],
27
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28
+ ]
29
+
30
+ TEN_PREFIXES = [ ['twenty', 20],
31
+ ['thirty', 30],
32
+ ['fourty', 40],
33
+ ['fifty', 50],
34
+ ['sixty', 60],
35
+ ['seventy', 70],
36
+ ['eighty', 80],
37
+ ['ninety', 90]
38
+ ]
39
+
40
+ BIG_PREFIXES = [ ['hundred', 100],
41
+ ['thousand', 1000],
42
+ ['million', 1_000_000],
43
+ ['billion', 1_000_000_000],
44
+ ['trillion', 1_000_000_000_000],
45
+ ]
46
+
47
+ def self.numerize(string)
48
+ string = string.dup
49
+
50
+ # preprocess
51
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
52
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
53
+
54
+ # easy/direct replacements
55
+
56
+ DIRECT_NUMS.each do |dn|
57
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
58
+ end
59
+
60
+ # ten, twenty, etc.
61
+
62
+ TEN_PREFIXES.each do |tp|
63
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
64
+ end
65
+
66
+ TEN_PREFIXES.each do |tp|
67
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
68
+ end
69
+
70
+ # hundreds, thousands, millions, etc.
71
+
72
+ BIG_PREFIXES.each do |bp|
73
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
74
+ andition(string)
75
+ end
76
+
77
+ # fractional addition
78
+ # I'm not combining this with the previous block as using float addition complicates the strings
79
+ # (with extraneous .0's and such )
80
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
81
+
82
+ string.gsub(/<num>/, '')
83
+ end
84
+
85
+ private
86
+
87
+ def self.andition(string)
88
+ sc = StringScanner.new(string)
89
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
90
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
91
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
92
+ sc.reset
93
+ end
94
+ end
95
+ end
96
+
97
+ end
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+
3
+ tests = Dir["#{File.dirname(__FILE__)}/test_*.rb"]
4
+ tests.delete_if { |o| o =~ /test_parsing/ }
5
+ tests.each do |file|
6
+ require file
7
+ end
8
+
9
+ require File.dirname(__FILE__) + '/test_parsing.rb'
@@ -0,0 +1,50 @@
1
+ require 'chronic'
2
+ require 'test/unit'
3
+
4
+ class TestChronic < Test::Unit::TestCase
5
+
6
+ def setup
7
+ # Wed Aug 16 14:00:00 UTC 2006
8
+ @now = Time.local(2006, 8, 16, 14, 0, 0, 0)
9
+ end
10
+
11
+ def test_post_normalize_am_pm_aliases
12
+ # affect wanted patterns
13
+
14
+ tokens = [Chronic::Token.new("5:00"), Chronic::Token.new("morning")]
15
+ tokens[0].tag(Chronic::RepeaterTime.new("5:00"))
16
+ tokens[1].tag(Chronic::RepeaterDayPortion.new(:morning))
17
+
18
+ assert_equal :morning, tokens[1].tags[0].type
19
+
20
+ tokens = Chronic.dealias_and_disambiguate_times(tokens, {})
21
+
22
+ assert_equal :am, tokens[1].tags[0].type
23
+ assert_equal 2, tokens.size
24
+
25
+ # don't affect unwanted patterns
26
+
27
+ tokens = [Chronic::Token.new("friday"), Chronic::Token.new("morning")]
28
+ tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
29
+ tokens[1].tag(Chronic::RepeaterDayPortion.new(:morning))
30
+
31
+ assert_equal :morning, tokens[1].tags[0].type
32
+
33
+ tokens = Chronic.dealias_and_disambiguate_times(tokens, {})
34
+
35
+ assert_equal :morning, tokens[1].tags[0].type
36
+ assert_equal 2, tokens.size
37
+ end
38
+
39
+ def test_guess
40
+ span = Chronic::Span.new(Time.local(2006, 8, 16, 0), Time.local(2006, 8, 17, 0))
41
+ assert_equal Time.local(2006, 8, 16, 12), Chronic.guess(span)
42
+
43
+ span = Chronic::Span.new(Time.local(2006, 8, 16, 0), Time.local(2006, 8, 17, 0, 0, 1))
44
+ assert_equal Time.local(2006, 8, 16, 12), Chronic.guess(span)
45
+
46
+ span = Chronic::Span.new(Time.local(2006, 11), Time.local(2006, 12))
47
+ assert_equal Time.local(2006, 11, 16), Chronic.guess(span)
48
+ end
49
+
50
+ end
@@ -0,0 +1,110 @@
1
+ require 'chronic'
2
+ require 'test/unit'
3
+
4
+ class TestHandler < Test::Unit::TestCase
5
+
6
+ def setup
7
+ # Wed Aug 16 14:00:00 UTC 2006
8
+ @now = Time.local(2006, 8, 16, 14, 0, 0, 0)
9
+ end
10
+
11
+ def test_handler_class_1
12
+ handler = Chronic::Handler.new([:repeater], :handler)
13
+
14
+ tokens = [Chronic::Token.new('friday')]
15
+ tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
16
+
17
+ assert handler.match(tokens, Chronic.definitions)
18
+
19
+ tokens << Chronic::Token.new('afternoon')
20
+ tokens[1].tag(Chronic::RepeaterDayPortion.new(:afternoon))
21
+
22
+ assert !handler.match(tokens, Chronic.definitions)
23
+ end
24
+
25
+ def test_handler_class_2
26
+ handler = Chronic::Handler.new([:repeater, :repeater?], :handler)
27
+
28
+ tokens = [Chronic::Token.new('friday')]
29
+ tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
30
+
31
+ assert handler.match(tokens, Chronic.definitions)
32
+
33
+ tokens << Chronic::Token.new('afternoon')
34
+ tokens[1].tag(Chronic::RepeaterDayPortion.new(:afternoon))
35
+
36
+ assert handler.match(tokens, Chronic.definitions)
37
+
38
+ tokens << Chronic::Token.new('afternoon')
39
+ tokens[2].tag(Chronic::RepeaterDayPortion.new(:afternoon))
40
+
41
+ assert !handler.match(tokens, Chronic.definitions)
42
+ end
43
+
44
+ def test_handler_class_3
45
+ handler = Chronic::Handler.new([:repeater, 'time?'], :handler)
46
+
47
+ tokens = [Chronic::Token.new('friday')]
48
+ tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
49
+
50
+ assert handler.match(tokens, Chronic.definitions)
51
+
52
+ tokens << Chronic::Token.new('afternoon')
53
+ tokens[1].tag(Chronic::RepeaterDayPortion.new(:afternoon))
54
+
55
+ assert !handler.match(tokens, Chronic.definitions)
56
+ end
57
+
58
+ def test_handler_class_4
59
+ handler = Chronic::Handler.new([:repeater_month_name, :scalar_day, 'time?'], :handler)
60
+
61
+ tokens = [Chronic::Token.new('may')]
62
+ tokens[0].tag(Chronic::RepeaterMonthName.new(:may))
63
+
64
+ assert !handler.match(tokens, Chronic.definitions)
65
+
66
+ tokens << Chronic::Token.new('27')
67
+ tokens[1].tag(Chronic::ScalarDay.new(27))
68
+
69
+ assert handler.match(tokens, Chronic.definitions)
70
+ end
71
+
72
+ def test_handler_class_5
73
+ handler = Chronic::Handler.new([:repeater, 'time?'], :handler)
74
+
75
+ tokens = [Chronic::Token.new('friday')]
76
+ tokens[0].tag(Chronic::RepeaterDayName.new(:friday))
77
+
78
+ assert handler.match(tokens, Chronic.definitions)
79
+
80
+ tokens << Chronic::Token.new('5:00')
81
+ tokens[1].tag(Chronic::RepeaterTime.new('5:00'))
82
+
83
+ assert handler.match(tokens, Chronic.definitions)
84
+
85
+ tokens << Chronic::Token.new('pm')
86
+ tokens[2].tag(Chronic::RepeaterDayPortion.new(:pm))
87
+
88
+ assert handler.match(tokens, Chronic.definitions)
89
+ end
90
+
91
+ def test_handler_class_6
92
+ handler = Chronic::Handler.new([:scalar, :repeater, :pointer], :handler)
93
+
94
+ tokens = [Chronic::Token.new('3'),
95
+ Chronic::Token.new('years'),
96
+ Chronic::Token.new('past')]
97
+
98
+ tokens[0].tag(Chronic::Scalar.new(3))
99
+ tokens[1].tag(Chronic::RepeaterYear.new(:year))
100
+ tokens[2].tag(Chronic::Pointer.new(:past))
101
+
102
+ assert handler.match(tokens, Chronic.definitions)
103
+ end
104
+
105
+ def test_constantize
106
+ handler = Chronic::Handler.new([], :handler)
107
+ assert_equal Chronic::RepeaterTime, handler.constantize(:repeater_time)
108
+ end
109
+
110
+ end
@@ -0,0 +1,52 @@
1
+ require 'test/unit'
2
+ require 'chronic'
3
+
4
+ class ParseNumbersTest < Test::Unit::TestCase
5
+
6
+ def test_straight_parsing
7
+ strings = { 1 => 'one',
8
+ 5 => 'five',
9
+ 10 => 'ten',
10
+ 11 => 'eleven',
11
+ 12 => 'twelve',
12
+ 13 => 'thirteen',
13
+ 14 => 'fourteen',
14
+ 15 => 'fifteen',
15
+ 16 => 'sixteen',
16
+ 17 => 'seventeen',
17
+ 18 => 'eighteen',
18
+ 19 => 'nineteen',
19
+ 20 => 'twenty',
20
+ 27 => 'twenty seven',
21
+ 31 => 'thirty-one',
22
+ 59 => 'fifty nine',
23
+ 100 => 'a hundred',
24
+ 100 => 'one hundred',
25
+ 150 => 'one hundred and fifty',
26
+ # 150 => 'one fifty',
27
+ 200 => 'two-hundred',
28
+ 500 => '5 hundred',
29
+ 999 => 'nine hundred and ninety nine',
30
+ 1_000 => 'one thousand',
31
+ 1_200 => 'twelve hundred',
32
+ 1_200 => 'one thousand two hundred',
33
+ 17_000 => 'seventeen thousand',
34
+ 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
35
+ 74_002 => 'seventy four thousand and two',
36
+ 99_999 => 'ninety nine thousand nine hundred ninety nine',
37
+ 100_000 => '100 thousand',
38
+ 250_000 => 'two hundred fifty thousand',
39
+ 1_000_000 => 'one million',
40
+ 1_250_007 => 'one million two hundred fifty thousand and seven',
41
+ 1_000_000_000 => 'one billion',
42
+ 1_000_000_001 => 'one billion and one' }
43
+
44
+ strings.keys.sort.each do |key|
45
+ assert_equal key, Numerizer.numerize(strings[key]).to_i
46
+ end
47
+ end
48
+
49
+ def test_edges
50
+ assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
51
+ end
52
+ end
@@ -0,0 +1,52 @@
1
+ require 'chronic'
2
+ require 'test/unit'
3
+
4
+ class TestRepeaterDayName < Test::Unit::TestCase
5
+
6
+ def setup
7
+ @now = Time.local(2006, 8, 16, 14, 0, 0, 0)
8
+ end
9
+
10
+ def test_match
11
+ token = Chronic::Token.new('saturday')
12
+ repeater = Chronic::Repeater.scan_for_day_names(token)
13
+ assert_equal Chronic::RepeaterDayName, repeater.class
14
+ assert_equal :saturday, repeater.type
15
+
16
+ token = Chronic::Token.new('sunday')
17
+ repeater = Chronic::Repeater.scan_for_day_names(token)
18
+ assert_equal Chronic::RepeaterDayName, repeater.class
19
+ assert_equal :sunday, repeater.type
20
+ end
21
+
22
+ def test_next_future
23
+ mondays = Chronic::RepeaterDayName.new(:monday)
24
+ mondays.start = @now
25
+
26
+ span = mondays.next(:future)
27
+
28
+ assert_equal Time.local(2006, 8, 21), span.begin
29
+ assert_equal Time.local(2006, 8, 22), span.end
30
+
31
+ span = mondays.next(:future)
32
+
33
+ assert_equal Time.local(2006, 8, 28), span.begin
34
+ assert_equal Time.local(2006, 8, 29), span.end
35
+ end
36
+
37
+ def test_next_past
38
+ mondays = Chronic::RepeaterDayName.new(:monday)
39
+ mondays.start = @now
40
+
41
+ span = mondays.next(:past)
42
+
43
+ assert_equal Time.local(2006, 8, 14), span.begin
44
+ assert_equal Time.local(2006, 8, 15), span.end
45
+
46
+ span = mondays.next(:past)
47
+
48
+ assert_equal Time.local(2006, 8, 7), span.begin
49
+ assert_equal Time.local(2006, 8, 8), span.end
50
+ end
51
+
52
+ end