mojombo-chronic 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +167 -0
- data/lib/chronic.rb +127 -0
- data/lib/chronic/chronic.rb +248 -0
- data/lib/chronic/grabber.rb +26 -0
- data/lib/chronic/handlers.rb +524 -0
- data/lib/chronic/ordinal.rb +40 -0
- data/lib/chronic/pointer.rb +27 -0
- data/lib/chronic/repeater.rb +129 -0
- data/lib/chronic/repeaters/repeater_day.rb +52 -0
- data/lib/chronic/repeaters/repeater_day_name.rb +51 -0
- data/lib/chronic/repeaters/repeater_day_portion.rb +94 -0
- data/lib/chronic/repeaters/repeater_fortnight.rb +70 -0
- data/lib/chronic/repeaters/repeater_hour.rb +57 -0
- data/lib/chronic/repeaters/repeater_minute.rb +57 -0
- data/lib/chronic/repeaters/repeater_month.rb +66 -0
- data/lib/chronic/repeaters/repeater_month_name.rb +98 -0
- data/lib/chronic/repeaters/repeater_season.rb +150 -0
- data/lib/chronic/repeaters/repeater_season_name.rb +45 -0
- data/lib/chronic/repeaters/repeater_second.rb +41 -0
- data/lib/chronic/repeaters/repeater_time.rb +120 -0
- data/lib/chronic/repeaters/repeater_week.rb +73 -0
- data/lib/chronic/repeaters/repeater_weekday.rb +77 -0
- data/lib/chronic/repeaters/repeater_weekend.rb +65 -0
- data/lib/chronic/repeaters/repeater_year.rb +64 -0
- data/lib/chronic/scalar.rb +76 -0
- data/lib/chronic/separator.rb +91 -0
- data/lib/chronic/time_zone.rb +23 -0
- data/lib/numerizer/numerizer.rb +97 -0
- data/test/suite.rb +9 -0
- data/test/test_Chronic.rb +50 -0
- data/test/test_Handler.rb +110 -0
- data/test/test_Numerizer.rb +52 -0
- data/test/test_RepeaterDayName.rb +52 -0
- data/test/test_RepeaterFortnight.rb +63 -0
- data/test/test_RepeaterHour.rb +65 -0
- data/test/test_RepeaterMonth.rb +47 -0
- data/test/test_RepeaterMonthName.rb +57 -0
- data/test/test_RepeaterTime.rb +72 -0
- data/test/test_RepeaterWeek.rb +63 -0
- data/test/test_RepeaterWeekday.rb +56 -0
- data/test/test_RepeaterWeekend.rb +75 -0
- data/test/test_RepeaterYear.rb +63 -0
- data/test/test_Span.rb +24 -0
- data/test/test_Time.rb +50 -0
- data/test/test_Token.rb +26 -0
- data/test/test_parsing.rb +706 -0
- metadata +102 -0
data/README
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
Chronic
|
2
|
+
http://chronic.rubyforge.org/
|
3
|
+
by Tom Preston-Werner
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Chronic is a natural language date/time parser written in pure Ruby. See below for the wide variety of formats Chronic will parse.
|
8
|
+
|
9
|
+
== INSTALLATION:
|
10
|
+
|
11
|
+
Chronic can be installed via RubyGems:
|
12
|
+
|
13
|
+
$ sudo gem install chronic
|
14
|
+
|
15
|
+
== CODE:
|
16
|
+
|
17
|
+
Browse the code and get an RSS feed of the commit log at:
|
18
|
+
|
19
|
+
http://github.com/mojombo/chronic.git
|
20
|
+
|
21
|
+
You can grab the code (and help with development) via git:
|
22
|
+
|
23
|
+
$ git clone git://github.com/mojombo/chronic.git
|
24
|
+
|
25
|
+
== USAGE:
|
26
|
+
|
27
|
+
You can parse strings containing a natural language date using the Chronic.parse method.
|
28
|
+
|
29
|
+
require 'rubygems'
|
30
|
+
require 'chronic'
|
31
|
+
|
32
|
+
Time.now #=> Sun Aug 27 23:18:25 PDT 2006
|
33
|
+
|
34
|
+
#---
|
35
|
+
|
36
|
+
Chronic.parse('tomorrow')
|
37
|
+
#=> Mon Aug 28 12:00:00 PDT 2006
|
38
|
+
|
39
|
+
Chronic.parse('monday', :context => :past)
|
40
|
+
#=> Mon Aug 21 12:00:00 PDT 2006
|
41
|
+
|
42
|
+
Chronic.parse('this tuesday 5:00')
|
43
|
+
#=> Tue Aug 29 17:00:00 PDT 2006
|
44
|
+
|
45
|
+
Chronic.parse('this tuesday 5:00', :ambiguous_time_range => :none)
|
46
|
+
#=> Tue Aug 29 05:00:00 PDT 2006
|
47
|
+
|
48
|
+
Chronic.parse('may 27th', :now => Time.local(2000, 1, 1))
|
49
|
+
#=> Sat May 27 12:00:00 PDT 2000
|
50
|
+
|
51
|
+
Chronic.parse('may 27th', :guess => false)
|
52
|
+
#=> Sun May 27 00:00:00 PDT 2007..Mon May 28 00:00:00 PDT 2007
|
53
|
+
|
54
|
+
See Chronic.parse for detailed usage instructions.
|
55
|
+
|
56
|
+
== EXAMPLES:
|
57
|
+
|
58
|
+
Chronic can parse a huge variety of date and time formats. Following is a small sample of strings that will be properly parsed. Parsing is case insensitive and will handle common abbreviations and misspellings.
|
59
|
+
|
60
|
+
Simple
|
61
|
+
|
62
|
+
thursday
|
63
|
+
november
|
64
|
+
summer
|
65
|
+
friday 13:00
|
66
|
+
mon 2:35
|
67
|
+
4pm
|
68
|
+
6 in the morning
|
69
|
+
friday 1pm
|
70
|
+
sat 7 in the evening
|
71
|
+
yesterday
|
72
|
+
today
|
73
|
+
tomorrow
|
74
|
+
this tuesday
|
75
|
+
next month
|
76
|
+
last winter
|
77
|
+
this morning
|
78
|
+
last night
|
79
|
+
this second
|
80
|
+
yesterday at 4:00
|
81
|
+
last friday at 20:00
|
82
|
+
last week tuesday
|
83
|
+
tomorrow at 6:45pm
|
84
|
+
afternoon yesterday
|
85
|
+
thursday last week
|
86
|
+
|
87
|
+
Complex
|
88
|
+
|
89
|
+
3 years ago
|
90
|
+
5 months before now
|
91
|
+
7 hours ago
|
92
|
+
7 days from now
|
93
|
+
1 week hence
|
94
|
+
in 3 hours
|
95
|
+
1 year ago tomorrow
|
96
|
+
3 months ago saturday at 5:00 pm
|
97
|
+
7 hours before tomorrow at noon
|
98
|
+
3rd wednesday in november
|
99
|
+
3rd month next year
|
100
|
+
3rd thursday this september
|
101
|
+
4th day last week
|
102
|
+
|
103
|
+
Specific Dates
|
104
|
+
|
105
|
+
January 5
|
106
|
+
dec 25
|
107
|
+
may 27th
|
108
|
+
October 2006
|
109
|
+
oct 06
|
110
|
+
jan 3 2010
|
111
|
+
february 14, 2004
|
112
|
+
3 jan 2000
|
113
|
+
17 april 85
|
114
|
+
5/27/1979
|
115
|
+
27/5/1979
|
116
|
+
05/06
|
117
|
+
1979-05-27
|
118
|
+
Friday
|
119
|
+
5
|
120
|
+
4:00
|
121
|
+
17:00
|
122
|
+
0800
|
123
|
+
|
124
|
+
Specific Times (many of the above with an added time)
|
125
|
+
|
126
|
+
January 5 at 7pm
|
127
|
+
1979-05-27 05:00:00
|
128
|
+
etc
|
129
|
+
|
130
|
+
== TIME ZONES:
|
131
|
+
|
132
|
+
Chronic allows you to set which Time class to use when constructing times. By default, the built in Ruby time class creates times in your system's
|
133
|
+
local time zone. You can set this to something like ActiveSupport's TimeZone class to get full time zone support.
|
134
|
+
|
135
|
+
>> Time.zone = "UTC"
|
136
|
+
>> Chronic.time_class = Time.zone
|
137
|
+
>> Chronic.parse("June 15 2006 at 5:45 AM")
|
138
|
+
=> Thu, 15 Jun 2006 05:45:00 UTC +00:00
|
139
|
+
|
140
|
+
== LIMITATIONS:
|
141
|
+
|
142
|
+
Chronic uses Ruby's built in Time class for all time storage and computation. Because of this, only times that the Time class can handle will be properly parsed. Parsing for times outside of this range will simply return nil. Support for a wider range of times is planned for a future release.
|
143
|
+
|
144
|
+
== LICENSE:
|
145
|
+
|
146
|
+
(The MIT License)
|
147
|
+
|
148
|
+
Copyright (c) 2008 Tom Preston-Werner
|
149
|
+
|
150
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
151
|
+
a copy of this software and associated documentation files (the
|
152
|
+
"Software"), to deal in the Software without restriction, including
|
153
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
154
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
155
|
+
permit persons to whom the Software is furnished to do so, subject to
|
156
|
+
the following conditions:
|
157
|
+
|
158
|
+
The above copyright notice and this permission notice shall be
|
159
|
+
included in all copies or substantial portions of the Software.
|
160
|
+
|
161
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
162
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
163
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
164
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
165
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
166
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
167
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/lib/chronic.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
#=============================================================================
|
2
|
+
#
|
3
|
+
# Name: Chronic
|
4
|
+
# Author: Tom Preston-Werner
|
5
|
+
# Purpose: Parse natural language dates and times into Time or
|
6
|
+
# Chronic::Span objects
|
7
|
+
#
|
8
|
+
#=============================================================================
|
9
|
+
|
10
|
+
$:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
|
11
|
+
|
12
|
+
require 'time'
|
13
|
+
|
14
|
+
require 'chronic/chronic'
|
15
|
+
require 'chronic/handlers'
|
16
|
+
|
17
|
+
require 'chronic/repeater'
|
18
|
+
require 'chronic/repeaters/repeater_year'
|
19
|
+
require 'chronic/repeaters/repeater_season'
|
20
|
+
require 'chronic/repeaters/repeater_season_name'
|
21
|
+
require 'chronic/repeaters/repeater_month'
|
22
|
+
require 'chronic/repeaters/repeater_month_name'
|
23
|
+
require 'chronic/repeaters/repeater_fortnight'
|
24
|
+
require 'chronic/repeaters/repeater_week'
|
25
|
+
require 'chronic/repeaters/repeater_weekend'
|
26
|
+
require 'chronic/repeaters/repeater_weekday'
|
27
|
+
require 'chronic/repeaters/repeater_day'
|
28
|
+
require 'chronic/repeaters/repeater_day_name'
|
29
|
+
require 'chronic/repeaters/repeater_day_portion'
|
30
|
+
require 'chronic/repeaters/repeater_hour'
|
31
|
+
require 'chronic/repeaters/repeater_minute'
|
32
|
+
require 'chronic/repeaters/repeater_second'
|
33
|
+
require 'chronic/repeaters/repeater_time'
|
34
|
+
|
35
|
+
require 'chronic/grabber'
|
36
|
+
require 'chronic/pointer'
|
37
|
+
require 'chronic/scalar'
|
38
|
+
require 'chronic/ordinal'
|
39
|
+
require 'chronic/separator'
|
40
|
+
require 'chronic/time_zone'
|
41
|
+
|
42
|
+
require 'numerizer/numerizer'
|
43
|
+
|
44
|
+
module Chronic
|
45
|
+
VERSION = "0.3.0"
|
46
|
+
|
47
|
+
class << self
|
48
|
+
attr_accessor :debug
|
49
|
+
attr_accessor :time_class
|
50
|
+
end
|
51
|
+
|
52
|
+
self.debug = false
|
53
|
+
self.time_class = Time
|
54
|
+
end
|
55
|
+
|
56
|
+
# class Time
|
57
|
+
# def self.construct(year, month = 1, day = 1, hour = 0, minute = 0, second = 0)
|
58
|
+
# # extra_seconds = second > 60 ? second - 60 : 0
|
59
|
+
# # extra_minutes = minute > 59 ? minute - 59 : 0
|
60
|
+
# # extra_hours = hour > 23 ? hour - 23 : 0
|
61
|
+
# # extra_days = day >
|
62
|
+
#
|
63
|
+
# if month > 12
|
64
|
+
# if month % 12 == 0
|
65
|
+
# year += (month - 12) / 12
|
66
|
+
# month = 12
|
67
|
+
# else
|
68
|
+
# year += month / 12
|
69
|
+
# month = month % 12
|
70
|
+
# end
|
71
|
+
# end
|
72
|
+
#
|
73
|
+
# base = Time.local(year, month)
|
74
|
+
# puts base
|
75
|
+
# offset = ((day - 1) * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + second
|
76
|
+
# puts offset.to_s
|
77
|
+
# date = base + offset
|
78
|
+
# puts date
|
79
|
+
# date
|
80
|
+
# end
|
81
|
+
# end
|
82
|
+
|
83
|
+
class Time
|
84
|
+
def self.construct(year, month = 1, day = 1, hour = 0, minute = 0, second = 0)
|
85
|
+
if second >= 60
|
86
|
+
minute += second / 60
|
87
|
+
second = second % 60
|
88
|
+
end
|
89
|
+
|
90
|
+
if minute >= 60
|
91
|
+
hour += minute / 60
|
92
|
+
minute = minute % 60
|
93
|
+
end
|
94
|
+
|
95
|
+
if hour >= 24
|
96
|
+
day += hour / 24
|
97
|
+
hour = hour % 24
|
98
|
+
end
|
99
|
+
|
100
|
+
# determine if there is a day overflow. this is complicated by our crappy calendar
|
101
|
+
# system (non-constant number of days per month)
|
102
|
+
day <= 56 || raise("day must be no more than 56 (makes month resolution easier)")
|
103
|
+
if day > 28
|
104
|
+
# no month ever has fewer than 28 days, so only do this if necessary
|
105
|
+
leap_year = (year % 4 == 0) && !(year % 100 == 0)
|
106
|
+
leap_year_month_days = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
107
|
+
common_year_month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
108
|
+
days_this_month = leap_year ? leap_year_month_days[month - 1] : common_year_month_days[month - 1]
|
109
|
+
if day > days_this_month
|
110
|
+
month += day / days_this_month
|
111
|
+
day = day % days_this_month
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
if month > 12
|
116
|
+
if month % 12 == 0
|
117
|
+
year += (month - 12) / 12
|
118
|
+
month = 12
|
119
|
+
else
|
120
|
+
year += month / 12
|
121
|
+
month = month % 12
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
Chronic.time_class.local(year, month, day, hour, minute, second)
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,248 @@
|
|
1
|
+
module Chronic
|
2
|
+
class << self
|
3
|
+
|
4
|
+
# Parses a string containing a natural language date or time. If the parser
|
5
|
+
# can find a date or time, either a Time or Chronic::Span will be returned
|
6
|
+
# (depending on the value of <tt>:guess</tt>). If no date or time can be found,
|
7
|
+
# +nil+ will be returned.
|
8
|
+
#
|
9
|
+
# Options are:
|
10
|
+
#
|
11
|
+
# [<tt>:context</tt>]
|
12
|
+
# <tt>:past</tt> or <tt>:future</tt> (defaults to <tt>:future</tt>)
|
13
|
+
#
|
14
|
+
# If your string represents a birthday, you can set <tt>:context</tt> to <tt>:past</tt>
|
15
|
+
# and if an ambiguous string is given, it will assume it is in the
|
16
|
+
# past. Specify <tt>:future</tt> or omit to set a future context.
|
17
|
+
#
|
18
|
+
# [<tt>:now</tt>]
|
19
|
+
# Time (defaults to Time.now)
|
20
|
+
#
|
21
|
+
# By setting <tt>:now</tt> to a Time, all computations will be based off
|
22
|
+
# of that time instead of Time.now. If set to nil, Chronic will use Time.now.
|
23
|
+
#
|
24
|
+
# [<tt>:guess</tt>]
|
25
|
+
# +true+ or +false+ (defaults to +true+)
|
26
|
+
#
|
27
|
+
# By default, the parser will guess a single point in time for the
|
28
|
+
# given date or time. If you'd rather have the entire time span returned,
|
29
|
+
# set <tt>:guess</tt> to +false+ and a Chronic::Span will be returned.
|
30
|
+
#
|
31
|
+
# [<tt>:ambiguous_time_range</tt>]
|
32
|
+
# Integer or <tt>:none</tt> (defaults to <tt>6</tt> (6am-6pm))
|
33
|
+
#
|
34
|
+
# If an Integer is given, ambiguous times (like 5:00) will be
|
35
|
+
# assumed to be within the range of that time in the AM to that time
|
36
|
+
# in the PM. For example, if you set it to <tt>7</tt>, then the parser will
|
37
|
+
# look for the time between 7am and 7pm. In the case of 5:00, it would
|
38
|
+
# assume that means 5:00pm. If <tt>:none</tt> is given, no assumption
|
39
|
+
# will be made, and the first matching instance of that time will
|
40
|
+
# be used.
|
41
|
+
def parse(text, specified_options = {})
|
42
|
+
@text = text
|
43
|
+
|
44
|
+
# get options and set defaults if necessary
|
45
|
+
default_options = {:context => :future,
|
46
|
+
:now => Chronic.time_class.now,
|
47
|
+
:guess => true,
|
48
|
+
:ambiguous_time_range => 6,
|
49
|
+
:endian_precedence => nil}
|
50
|
+
options = default_options.merge specified_options
|
51
|
+
|
52
|
+
# handle options that were set to nil
|
53
|
+
options[:context] = :future unless options[:context]
|
54
|
+
options[:now] = Chronic.time_class.now unless options[:context]
|
55
|
+
options[:ambiguous_time_range] = 6 unless options[:ambiguous_time_range]
|
56
|
+
|
57
|
+
# ensure the specified options are valid
|
58
|
+
specified_options.keys.each do |key|
|
59
|
+
default_options.keys.include?(key) || raise(InvalidArgumentException, "#{key} is not a valid option key.")
|
60
|
+
end
|
61
|
+
[:past, :future, :none].include?(options[:context]) || raise(InvalidArgumentException, "Invalid value ':#{options[:context]}' for :context specified. Valid values are :past and :future.")
|
62
|
+
|
63
|
+
# store now for later =)
|
64
|
+
@now = options[:now]
|
65
|
+
|
66
|
+
# put the text into a normal format to ease scanning
|
67
|
+
text = self.pre_normalize(text)
|
68
|
+
|
69
|
+
# get base tokens for each word
|
70
|
+
@tokens = self.base_tokenize(text)
|
71
|
+
|
72
|
+
# scan the tokens with each token scanner
|
73
|
+
[Repeater].each do |tokenizer|
|
74
|
+
@tokens = tokenizer.scan(@tokens, options)
|
75
|
+
end
|
76
|
+
|
77
|
+
[Grabber, Pointer, Scalar, Ordinal, Separator, TimeZone].each do |tokenizer|
|
78
|
+
@tokens = tokenizer.scan(@tokens)
|
79
|
+
end
|
80
|
+
|
81
|
+
# strip any non-tagged tokens
|
82
|
+
@tokens = @tokens.select { |token| token.tagged? }
|
83
|
+
|
84
|
+
if Chronic.debug
|
85
|
+
puts "+---------------------------------------------------"
|
86
|
+
puts "| " + @tokens.to_s
|
87
|
+
puts "+---------------------------------------------------"
|
88
|
+
end
|
89
|
+
|
90
|
+
# do the heavy lifting
|
91
|
+
begin
|
92
|
+
span = self.tokens_to_span(@tokens, options)
|
93
|
+
rescue
|
94
|
+
raise
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
|
98
|
+
# guess a time within a span if required
|
99
|
+
if options[:guess]
|
100
|
+
return self.guess(span)
|
101
|
+
else
|
102
|
+
return span
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Clean up the specified input text by stripping unwanted characters,
|
107
|
+
# converting idioms to their canonical form, converting number words
|
108
|
+
# to numbers (three => 3), and converting ordinal words to numeric
|
109
|
+
# ordinals (third => 3rd)
|
110
|
+
def pre_normalize(text) #:nodoc:
|
111
|
+
normalized_text = text.to_s.downcase
|
112
|
+
normalized_text = numericize_numbers(normalized_text)
|
113
|
+
normalized_text.gsub!(/['"\.,]/, '')
|
114
|
+
normalized_text.gsub!(/ \-(\d{4})\b/, ' tzminus\1')
|
115
|
+
normalized_text.gsub!(/([\/\-\,\@])/) { ' ' + $1 + ' ' }
|
116
|
+
normalized_text.gsub!(/\btoday\b/, 'this day')
|
117
|
+
normalized_text.gsub!(/\btomm?orr?ow\b/, 'next day')
|
118
|
+
normalized_text.gsub!(/\byesterday\b/, 'last day')
|
119
|
+
normalized_text.gsub!(/\bnoon\b/, '12:00')
|
120
|
+
normalized_text.gsub!(/\bmidnight\b/, '24:00')
|
121
|
+
normalized_text.gsub!(/\bbefore now\b/, 'past')
|
122
|
+
normalized_text.gsub!(/\bnow\b/, 'this second')
|
123
|
+
normalized_text.gsub!(/\b(ago|before)\b/, 'past')
|
124
|
+
normalized_text.gsub!(/\bthis past\b/, 'last')
|
125
|
+
normalized_text.gsub!(/\bthis last\b/, 'last')
|
126
|
+
normalized_text.gsub!(/\b(?:in|during) the (morning)\b/, '\1')
|
127
|
+
normalized_text.gsub!(/\b(?:in the|during the|at) (afternoon|evening|night)\b/, '\1')
|
128
|
+
normalized_text.gsub!(/\btonight\b/, 'this night')
|
129
|
+
normalized_text.gsub!(/(\d)([ap]m|oclock)\b/, '\1 \2')
|
130
|
+
normalized_text.gsub!(/\b(hence|after|from)\b/, 'future')
|
131
|
+
normalized_text = numericize_ordinals(normalized_text)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Convert number words to numbers (three => 3)
|
135
|
+
def numericize_numbers(text) #:nodoc:
|
136
|
+
Numerizer.numerize(text)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Convert ordinal words to numeric ordinals (third => 3rd)
|
140
|
+
def numericize_ordinals(text) #:nodoc:
|
141
|
+
text
|
142
|
+
end
|
143
|
+
|
144
|
+
# Split the text on spaces and convert each word into
|
145
|
+
# a Token
|
146
|
+
def base_tokenize(text) #:nodoc:
|
147
|
+
text.split(' ').map { |word| Token.new(word) }
|
148
|
+
end
|
149
|
+
|
150
|
+
# Guess a specific time within the given span
|
151
|
+
def guess(span) #:nodoc:
|
152
|
+
return nil if span.nil?
|
153
|
+
if span.width > 1
|
154
|
+
span.begin + (span.width / 2)
|
155
|
+
else
|
156
|
+
span.begin
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
class Token #:nodoc:
|
162
|
+
attr_accessor :word, :tags
|
163
|
+
|
164
|
+
def initialize(word)
|
165
|
+
@word = word
|
166
|
+
@tags = []
|
167
|
+
end
|
168
|
+
|
169
|
+
# Tag this token with the specified tag
|
170
|
+
def tag(new_tag)
|
171
|
+
@tags << new_tag
|
172
|
+
end
|
173
|
+
|
174
|
+
# Remove all tags of the given class
|
175
|
+
def untag(tag_class)
|
176
|
+
@tags = @tags.select { |m| !m.kind_of? tag_class }
|
177
|
+
end
|
178
|
+
|
179
|
+
# Return true if this token has any tags
|
180
|
+
def tagged?
|
181
|
+
@tags.size > 0
|
182
|
+
end
|
183
|
+
|
184
|
+
# Return the Tag that matches the given class
|
185
|
+
def get_tag(tag_class)
|
186
|
+
matches = @tags.select { |m| m.kind_of? tag_class }
|
187
|
+
#matches.size < 2 || raise("Multiple identical tags found")
|
188
|
+
return matches.first
|
189
|
+
end
|
190
|
+
|
191
|
+
# Print this Token in a pretty way
|
192
|
+
def to_s
|
193
|
+
@word << '(' << @tags.join(', ') << ') '
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# A Span represents a range of time. Since this class extends
|
198
|
+
# Range, you can use #begin and #end to get the beginning and
|
199
|
+
# ending times of the span (they will be of class Time)
|
200
|
+
class Span < Range
|
201
|
+
# Returns the width of this span in seconds
|
202
|
+
def width
|
203
|
+
(self.end - self.begin).to_i
|
204
|
+
end
|
205
|
+
|
206
|
+
# Add a number of seconds to this span, returning the
|
207
|
+
# resulting Span
|
208
|
+
def +(seconds)
|
209
|
+
Span.new(self.begin + seconds, self.end + seconds)
|
210
|
+
end
|
211
|
+
|
212
|
+
# Subtract a number of seconds to this span, returning the
|
213
|
+
# resulting Span
|
214
|
+
def -(seconds)
|
215
|
+
self + -seconds
|
216
|
+
end
|
217
|
+
|
218
|
+
# Prints this span in a nice fashion
|
219
|
+
def to_s
|
220
|
+
'(' << self.begin.to_s << '..' << self.end.to_s << ')'
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Tokens are tagged with subclassed instances of this class when
|
225
|
+
# they match specific criteria
|
226
|
+
class Tag #:nodoc:
|
227
|
+
attr_accessor :type
|
228
|
+
|
229
|
+
def initialize(type)
|
230
|
+
@type = type
|
231
|
+
end
|
232
|
+
|
233
|
+
def start=(s)
|
234
|
+
@now = s
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
# Internal exception
|
239
|
+
class ChronicPain < Exception #:nodoc:
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
# This exception is raised if an invalid argument is provided to
|
244
|
+
# any of Chronic's methods
|
245
|
+
class InvalidArgumentException < Exception
|
246
|
+
|
247
|
+
end
|
248
|
+
end
|