gforces-chronik 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/README +161 -0
  2. data/lib/chronik.rb +100 -0
  3. data/lib/chronik/chronik.rb +249 -0
  4. data/lib/chronik/grabber.rb +26 -0
  5. data/lib/chronik/handlers.rb +524 -0
  6. data/lib/chronik/ordinal.rb +40 -0
  7. data/lib/chronik/pointer.rb +27 -0
  8. data/lib/chronik/repeater.rb +129 -0
  9. data/lib/chronik/repeaters/repeater_day.rb +52 -0
  10. data/lib/chronik/repeaters/repeater_day_name.rb +51 -0
  11. data/lib/chronik/repeaters/repeater_day_portion.rb +94 -0
  12. data/lib/chronik/repeaters/repeater_fortnight.rb +70 -0
  13. data/lib/chronik/repeaters/repeater_hour.rb +57 -0
  14. data/lib/chronik/repeaters/repeater_minute.rb +57 -0
  15. data/lib/chronik/repeaters/repeater_month.rb +66 -0
  16. data/lib/chronik/repeaters/repeater_month_name.rb +98 -0
  17. data/lib/chronik/repeaters/repeater_season.rb +150 -0
  18. data/lib/chronik/repeaters/repeater_season_name.rb +45 -0
  19. data/lib/chronik/repeaters/repeater_second.rb +41 -0
  20. data/lib/chronik/repeaters/repeater_time.rb +124 -0
  21. data/lib/chronik/repeaters/repeater_week.rb +73 -0
  22. data/lib/chronik/repeaters/repeater_weekday.rb +77 -0
  23. data/lib/chronik/repeaters/repeater_weekend.rb +65 -0
  24. data/lib/chronik/repeaters/repeater_year.rb +64 -0
  25. data/lib/chronik/scalar.rb +76 -0
  26. data/lib/chronik/separator.rb +91 -0
  27. data/lib/chronik/time_zone.rb +23 -0
  28. data/lib/numerizer/numerizer.rb +100 -0
  29. data/test/suite.rb +9 -0
  30. data/test/test_Chronik.rb +50 -0
  31. data/test/test_Handler.rb +110 -0
  32. data/test/test_Numerizer.rb +38 -0
  33. data/test/test_RepeaterDayName.rb +52 -0
  34. data/test/test_RepeaterFortnight.rb +63 -0
  35. data/test/test_RepeaterHour.rb +65 -0
  36. data/test/test_RepeaterMonth.rb +47 -0
  37. data/test/test_RepeaterMonthName.rb +57 -0
  38. data/test/test_RepeaterTime.rb +72 -0
  39. data/test/test_RepeaterWeek.rb +63 -0
  40. data/test/test_RepeaterWeekday.rb +56 -0
  41. data/test/test_RepeaterWeekend.rb +75 -0
  42. data/test/test_RepeaterYear.rb +63 -0
  43. data/test/test_Span.rb +24 -0
  44. data/test/test_Time.rb +50 -0
  45. data/test/test_Token.rb +26 -0
  46. data/test/test_parsing.rb +711 -0
  47. metadata +103 -0
@@ -0,0 +1,76 @@
1
+ module Chronik
2
+
3
+ class Scalar < Tag #:nodoc:
4
+ def self.scan(tokens)
5
+ # for each token
6
+ tokens.each_index do |i|
7
+ if t = self.scan_for_scalars(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
8
+ if t = self.scan_for_days(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
9
+ if t = self.scan_for_months(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
10
+ if t = self.scan_for_years(tokens[i], tokens[i + 1]) then tokens[i].tag(t) end
11
+ end
12
+ tokens
13
+ end
14
+
15
+ def self.scan_for_scalars(token, post_token)
16
+ if token.word =~ /^\d*$/
17
+ unless post_token && %w{am pm morning afternoon evening night}.include?(post_token)
18
+ return Scalar.new(token.word.to_i)
19
+ end
20
+ end
21
+ return nil
22
+ end
23
+
24
+ def self.scan_for_days(token, post_token)
25
+ if token.word =~ /^\d\d?$/
26
+ toi = token.word.to_i
27
+ unless toi > 31 || toi < 1 || (post_token && %w{am pm morning afternoon evening night}.include?(post_token.word))
28
+ return ScalarDay.new(toi)
29
+ end
30
+ end
31
+ return nil
32
+ end
33
+
34
+ def self.scan_for_months(token, post_token)
35
+ if token.word =~ /^\d\d?$/
36
+ toi = token.word.to_i
37
+ unless toi > 12 || toi < 1 || (post_token && %w{am pm morning afternoon evening night}.include?(post_token.word))
38
+ return ScalarMonth.new(toi)
39
+ end
40
+ end
41
+ return nil
42
+ end
43
+
44
+ def self.scan_for_years(token, post_token)
45
+ if token.word =~ /^([1-9]\d)?\d\d?$/
46
+ unless post_token && %w{am pm morning afternoon evening night}.include?(post_token.word)
47
+ return ScalarYear.new(token.word.to_i)
48
+ end
49
+ end
50
+ return nil
51
+ end
52
+
53
+ def to_s
54
+ 'scalar'
55
+ end
56
+ end
57
+
58
+ class ScalarDay < Scalar #:nodoc:
59
+ def to_s
60
+ super << '-day-' << @type.to_s
61
+ end
62
+ end
63
+
64
+ class ScalarMonth < Scalar #:nodoc:
65
+ def to_s
66
+ super << '-month-' << @type.to_s
67
+ end
68
+ end
69
+
70
+ class ScalarYear < Scalar #:nodoc:
71
+ def to_s
72
+ super << '-year-' << @type.to_s
73
+ end
74
+ end
75
+
76
+ end
@@ -0,0 +1,91 @@
1
+ module Chronik
2
+
3
+ class Separator < Tag #:nodoc:
4
+ def self.scan(tokens)
5
+ tokens.each_index do |i|
6
+ if t = self.scan_for_commas(tokens[i]) then tokens[i].tag(t); next end
7
+ if t = self.scan_for_slash_or_dash(tokens[i]) then tokens[i].tag(t); next end
8
+ if t = self.scan_for_at(tokens[i]) then tokens[i].tag(t); next end
9
+ if t = self.scan_for_in(tokens[i]) then tokens[i].tag(t); next end
10
+ if t = self.scan_for_on(tokens[i]) then tokens[i].tag(t); next end
11
+ end
12
+ tokens
13
+ end
14
+
15
+ def self.scan_for_commas(token)
16
+ scanner = {/^,$/ => :comma}
17
+ scanner.keys.each do |scanner_item|
18
+ return SeparatorComma.new(scanner[scanner_item]) if scanner_item =~ token.word
19
+ end
20
+ return nil
21
+ end
22
+
23
+ def self.scan_for_slash_or_dash(token)
24
+ scanner = {/^-$/ => :dash,
25
+ /^\/$/ => :slash}
26
+ scanner.keys.each do |scanner_item|
27
+ return SeparatorSlashOrDash.new(scanner[scanner_item]) if scanner_item =~ token.word
28
+ end
29
+ return nil
30
+ end
31
+
32
+ def self.scan_for_at(token)
33
+ scanner = {/^(at|@)$/ => :at}
34
+ scanner.keys.each do |scanner_item|
35
+ return SeparatorAt.new(scanner[scanner_item]) if scanner_item =~ token.word
36
+ end
37
+ return nil
38
+ end
39
+
40
+ def self.scan_for_in(token)
41
+ scanner = {/^in$/ => :in}
42
+ scanner.keys.each do |scanner_item|
43
+ return SeparatorIn.new(scanner[scanner_item]) if scanner_item =~ token.word
44
+ end
45
+ return nil
46
+ end
47
+
48
+ def self.scan_for_on(token)
49
+ scanner = {/^on$/ => :on}
50
+ scanner.keys.each do |scanner_item|
51
+ return SeparatorOn.new(scanner[scanner_item]) if scanner_item =~ token.word
52
+ end
53
+ return nil
54
+ end
55
+
56
+ def to_s
57
+ 'separator'
58
+ end
59
+ end
60
+
61
+ class SeparatorComma < Separator #:nodoc:
62
+ def to_s
63
+ super << '-comma'
64
+ end
65
+ end
66
+
67
+ class SeparatorSlashOrDash < Separator #:nodoc:
68
+ def to_s
69
+ super << '-slashordash-' << @type.to_s
70
+ end
71
+ end
72
+
73
+ class SeparatorAt < Separator #:nodoc:
74
+ def to_s
75
+ super << '-at'
76
+ end
77
+ end
78
+
79
+ class SeparatorIn < Separator #:nodoc:
80
+ def to_s
81
+ super << '-in'
82
+ end
83
+ end
84
+
85
+ class SeparatorOn < Separator #:nodoc:
86
+ def to_s
87
+ super << '-on'
88
+ end
89
+ end
90
+
91
+ end
@@ -0,0 +1,23 @@
1
+ module Chronik
2
+ class TimeZone < Tag #:nodoc:
3
+ def self.scan(tokens)
4
+ tokens.each_index do |i|
5
+ if t = self.scan_for_all(tokens[i]) then tokens[i].tag(t); next end
6
+ end
7
+ tokens
8
+ end
9
+
10
+ def self.scan_for_all(token)
11
+ scanner = {/[PMCE][DS]T/i => :tz,
12
+ /(tzminus)?\d{4}/ => :tz}
13
+ scanner.keys.each do |scanner_item|
14
+ return self.new(scanner[scanner_item]) if scanner_item =~ token.word
15
+ end
16
+ return nil
17
+ end
18
+
19
+ def to_s
20
+ 'timezone'
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,100 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['jedenaście', '11'],
7
+ ['dwanaście', '12'],
8
+ ['trzynaście', '13'],
9
+ ['czternaście', '14'],
10
+ ['piętnaście', '15'],
11
+ ['szesnaście', '16'],
12
+ ['siedemnaście', '17'],
13
+ ['osiemnaście', '18'],
14
+ ['dziewiętnaście', '19'],
15
+ ['zero', '0'],
16
+ ['jeden', '1'],
17
+ ['dwa(\W|$)', '2\1'],
18
+ ['trzy(\W|$)', '3\1'],
19
+ ['cztery', '4'],
20
+ ['pięć(\W|$)', '5\1'], # The weird regex is so that it matches pięć but not pięćdziesiąt
21
+ ['sześć(\W|$)', '6\1'],
22
+ ['siedem(\W|$)', '7\1'],
23
+ ['osiem(\W|$)', '8\1'],
24
+ ['dziewięć(\W|$)', '9\1'],
25
+ ['dziesięć', '10']
26
+ ]
27
+
28
+ TEN_PREFIXES = [ ['dwadzieścia', 20],
29
+ ['trzydzieści', 30],
30
+ ['czterdzieści', 40],
31
+ ['pięćdziesiąt', 50],
32
+ ['sześćdziesiąt', 60],
33
+ ['siedemdziesiąt', 70],
34
+ ['osiemdziesiąt', 80],
35
+ ['dziewięćdziesiąt', 90]
36
+ ]
37
+
38
+ HUNDRET_PREFIXES = [ ['sto', 100],
39
+ ['dwieście', 200],
40
+ ['trzysta', 300],
41
+ ['czterysta', 400],
42
+ ['pięćset', 500],
43
+ ['sześćset', 600],
44
+ ['siedemset', 700],
45
+ ['osiemset', 800],
46
+ ['dziewięćset', 900],
47
+ ['tysiąc', 1000]
48
+ ]
49
+
50
+ def self.numerize(string)
51
+ string = string.dup
52
+
53
+ # preprocess
54
+ # string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
55
+ # string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
56
+
57
+ # easy/direct replacements
58
+
59
+ DIRECT_NUMS.each do |dn|
60
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
61
+ end
62
+
63
+ # ten, twenty, etc.
64
+
65
+ TEN_PREFIXES.each do |tp|
66
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
67
+ end
68
+
69
+ TEN_PREFIXES.each do |tp|
70
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
71
+ end
72
+
73
+ # hundreds, thousands, millions, etc.
74
+
75
+ HUNDRET_PREFIXES.each do |hp|
76
+ string.gsub!(/#{hp[0]}/i) { '<num>' + hp[1].to_s }
77
+ andition(string)
78
+ end
79
+
80
+ # fractional addition
81
+ # I'm not combining this with the previous block as using float addition complicates the strings
82
+ # (with extraneous .0's and such )
83
+ # string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
84
+
85
+ string.gsub(/<num>/, '')
86
+ end
87
+
88
+ private
89
+
90
+ def self.andition(string)
91
+ sc = StringScanner.new(string)
92
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
93
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
94
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
95
+ sc.reset
96
+ end
97
+ end
98
+ end
99
+
100
+ end
data/test/suite.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+
3
+ tests = Dir["#{File.dirname(__FILE__)}/test_*.rb"]
4
+ tests.delete_if { |o| o =~ /test_parsing/ }
5
+ tests.each do |file|
6
+ require file
7
+ end
8
+
9
+ require File.dirname(__FILE__) + '/test_parsing.rb'
@@ -0,0 +1,50 @@
1
+ require 'chronik'
2
+ require 'test/unit'
3
+
4
+ class TestChronik < Test::Unit::TestCase
5
+
6
+ def setup
7
+ # Wed Aug 16 14:00:00 UTC 2006
8
+ @now = Time.local(2006, 8, 16, 14, 0, 0, 0)
9
+ end
10
+
11
+ def test_post_normalize_am_pm_aliases
12
+ # affect wanted patterns
13
+
14
+ tokens = [Chronik::Token.new("5:00"), Chronik::Token.new("morning")]
15
+ tokens[0].tag(Chronik::RepeaterTime.new("5:00"))
16
+ tokens[1].tag(Chronik::RepeaterDayPortion.new(:morning))
17
+
18
+ assert_equal :morning, tokens[1].tags[0].type
19
+
20
+ tokens = Chronik.dealias_and_disambiguate_times(tokens, {})
21
+
22
+ assert_equal :am, tokens[1].tags[0].type
23
+ assert_equal 2, tokens.size
24
+
25
+ # don't affect unwanted patterns
26
+
27
+ tokens = [Chronik::Token.new("friday"), Chronik::Token.new("morning")]
28
+ tokens[0].tag(Chronik::RepeaterDayName.new(:friday))
29
+ tokens[1].tag(Chronik::RepeaterDayPortion.new(:morning))
30
+
31
+ assert_equal :morning, tokens[1].tags[0].type
32
+
33
+ tokens = Chronik.dealias_and_disambiguate_times(tokens, {})
34
+
35
+ assert_equal :morning, tokens[1].tags[0].type
36
+ assert_equal 2, tokens.size
37
+ end
38
+
39
+ def test_guess
40
+ span = Chronik::Span.new(Time.local(2006, 8, 16, 0), Time.local(2006, 8, 17, 0))
41
+ assert_equal Time.local(2006, 8, 16, 12), Chronik.guess(span)
42
+
43
+ span = Chronik::Span.new(Time.local(2006, 8, 16, 0), Time.local(2006, 8, 17, 0, 0, 1))
44
+ assert_equal Time.local(2006, 8, 16, 12), Chronik.guess(span)
45
+
46
+ span = Chronik::Span.new(Time.local(2006, 11), Time.local(2006, 12))
47
+ assert_equal Time.local(2006, 11, 16), Chronik.guess(span)
48
+ end
49
+
50
+ end
@@ -0,0 +1,110 @@
1
+ require 'chronik'
2
+ require 'test/unit'
3
+
4
+ class TestHandler < Test::Unit::TestCase
5
+
6
+ def setup
7
+ # Wed Aug 16 14:00:00 UTC 2006
8
+ @now = Time.local(2006, 8, 16, 14, 0, 0, 0)
9
+ end
10
+
11
+ def test_handler_class_1
12
+ handler = Chronik::Handler.new([:repeater], :handler)
13
+
14
+ tokens = [Chronik::Token.new('friday')]
15
+ tokens[0].tag(Chronik::RepeaterDayName.new(:friday))
16
+
17
+ assert handler.match(tokens, Chronik.definitions)
18
+
19
+ tokens << Chronik::Token.new('afternoon')
20
+ tokens[1].tag(Chronik::RepeaterDayPortion.new(:afternoon))
21
+
22
+ assert !handler.match(tokens, Chronik.definitions)
23
+ end
24
+
25
+ def test_handler_class_2
26
+ handler = Chronik::Handler.new([:repeater, :repeater?], :handler)
27
+
28
+ tokens = [Chronik::Token.new('friday')]
29
+ tokens[0].tag(Chronik::RepeaterDayName.new(:friday))
30
+
31
+ assert handler.match(tokens, Chronik.definitions)
32
+
33
+ tokens << Chronik::Token.new('afternoon')
34
+ tokens[1].tag(Chronik::RepeaterDayPortion.new(:afternoon))
35
+
36
+ assert handler.match(tokens, Chronik.definitions)
37
+
38
+ tokens << Chronik::Token.new('afternoon')
39
+ tokens[2].tag(Chronik::RepeaterDayPortion.new(:afternoon))
40
+
41
+ assert !handler.match(tokens, Chronik.definitions)
42
+ end
43
+
44
+ def test_handler_class_3
45
+ handler = Chronik::Handler.new([:repeater, 'time?'], :handler)
46
+
47
+ tokens = [Chronik::Token.new('friday')]
48
+ tokens[0].tag(Chronik::RepeaterDayName.new(:friday))
49
+
50
+ assert handler.match(tokens, Chronik.definitions)
51
+
52
+ tokens << Chronik::Token.new('afternoon')
53
+ tokens[1].tag(Chronik::RepeaterDayPortion.new(:afternoon))
54
+
55
+ assert !handler.match(tokens, Chronik.definitions)
56
+ end
57
+
58
+ def test_handler_class_4
59
+ handler = Chronik::Handler.new([:repeater_month_name, :scalar_day, 'time?'], :handler)
60
+
61
+ tokens = [Chronik::Token.new('may')]
62
+ tokens[0].tag(Chronik::RepeaterMonthName.new(:may))
63
+
64
+ assert !handler.match(tokens, Chronik.definitions)
65
+
66
+ tokens << Chronik::Token.new('27')
67
+ tokens[1].tag(Chronik::ScalarDay.new(27))
68
+
69
+ assert handler.match(tokens, Chronik.definitions)
70
+ end
71
+
72
+ def test_handler_class_5
73
+ handler = Chronik::Handler.new([:repeater, 'time?'], :handler)
74
+
75
+ tokens = [Chronik::Token.new('friday')]
76
+ tokens[0].tag(Chronik::RepeaterDayName.new(:friday))
77
+
78
+ assert handler.match(tokens, Chronik.definitions)
79
+
80
+ tokens << Chronik::Token.new('5:00')
81
+ tokens[1].tag(Chronik::RepeaterTime.new('5:00'))
82
+
83
+ assert handler.match(tokens, Chronik.definitions)
84
+
85
+ tokens << Chronik::Token.new('pm')
86
+ tokens[2].tag(Chronik::RepeaterDayPortion.new(:pm))
87
+
88
+ assert handler.match(tokens, Chronik.definitions)
89
+ end
90
+
91
+ def test_handler_class_6
92
+ handler = Chronik::Handler.new([:scalar, :repeater, :pointer], :handler)
93
+
94
+ tokens = [Chronik::Token.new('3'),
95
+ Chronik::Token.new('years'),
96
+ Chronik::Token.new('past')]
97
+
98
+ tokens[0].tag(Chronik::Scalar.new(3))
99
+ tokens[1].tag(Chronik::RepeaterYear.new(:year))
100
+ tokens[2].tag(Chronik::Pointer.new(:past))
101
+
102
+ assert handler.match(tokens, Chronik.definitions)
103
+ end
104
+
105
+ def test_constantize
106
+ handler = Chronik::Handler.new([], :handler)
107
+ assert_equal Chronik::RepeaterTime, handler.constantize(:repeater_time)
108
+ end
109
+
110
+ end