pangel-chronic 0.3.0.3 → 0.3.10
Sign up to get free protection for your applications and to get access to all the features.
- data/{README.rdoc → README.txt} +7 -22
- data/lib/chronic.rb +89 -12
- data/lib/chronic/chronic.rb +260 -301
- data/lib/chronic/grabber.rb +23 -23
- data/lib/chronic/handlers.rb +538 -557
- data/lib/chronic/ordinal.rb +36 -35
- data/lib/chronic/pointer.rb +24 -26
- data/lib/chronic/repeater.rb +128 -138
- data/lib/chronic/repeaters/repeater_day.rb +51 -51
- data/lib/chronic/repeaters/repeater_day_name.rb +50 -52
- data/lib/chronic/repeaters/repeater_day_portion.rb +93 -93
- data/lib/chronic/repeaters/repeater_fortnight.rb +66 -66
- data/lib/chronic/repeaters/repeater_hour.rb +56 -57
- data/lib/chronic/repeaters/repeater_minute.rb +56 -56
- data/lib/chronic/repeaters/repeater_month.rb +71 -62
- data/lib/chronic/repeaters/repeater_month_name.rb +95 -95
- data/lib/chronic/repeaters/repeater_season.rb +142 -142
- data/lib/chronic/repeaters/repeater_season_name.rb +42 -42
- data/lib/chronic/repeaters/repeater_second.rb +40 -40
- data/lib/chronic/repeaters/repeater_time.rb +124 -123
- data/lib/chronic/repeaters/repeater_week.rb +70 -70
- data/lib/chronic/repeaters/repeater_weekday.rb +76 -76
- data/lib/chronic/repeaters/repeater_weekend.rb +63 -63
- data/lib/chronic/repeaters/repeater_year.rb +63 -63
- data/lib/chronic/scalar.rb +89 -70
- data/lib/chronic/separator.rb +88 -88
- data/lib/chronic/time_zone.rb +23 -20
- data/lib/numerizer/numerizer.rb +93 -94
- data/test/suite.rb +2 -2
- data/test/test_Chronic.rb +47 -47
- data/test/test_Handler.rb +106 -106
- data/test/test_Numerizer.rb +47 -49
- data/test/test_RepeaterDayName.rb +48 -48
- data/test/test_RepeaterFortnight.rb +59 -59
- data/test/test_RepeaterHour.rb +61 -64
- data/test/test_RepeaterMonth.rb +43 -43
- data/test/test_RepeaterMonthName.rb +53 -53
- data/test/test_RepeaterTime.rb +68 -68
- data/test/test_RepeaterWeek.rb +59 -59
- data/test/test_RepeaterWeekday.rb +53 -53
- data/test/test_RepeaterWeekend.rb +71 -71
- data/test/test_RepeaterYear.rb +59 -59
- data/test/test_Span.rb +19 -28
- data/test/test_Time.rb +46 -46
- data/test/test_Token.rb +22 -22
- data/test/test_parsing.rb +726 -792
- metadata +6 -10
data/{README.rdoc → README.txt}
RENAMED
@@ -1,22 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
Chronic
|
3
2
|
http://chronic.rubyforge.org/
|
4
|
-
|
5
3
|
by Tom Preston-Werner
|
6
4
|
|
7
|
-
= WARNING:
|
8
|
-
|
9
|
-
If you haven't noticed already, this is a fork of mojombo's (Tom's) stable
|
10
|
-
chronic. I decided on my own volition that the 40-some (as reported by Github)
|
11
|
-
network should be merged together. I got it to run, but quite haphazardly. There
|
12
|
-
are a lot of new features (mostly undocumented except the git logs) so be a
|
13
|
-
little flexible in your language passed to Chronic.
|
14
|
-
|
15
|
-
Given that, if there is a bug, more than likely it's my own fault, not
|
16
|
-
mojombo's and therefore bug reports should be sent to my fork, not his.
|
17
|
-
|
18
|
-
Enjoy Chronic!
|
19
|
-
|
20
5
|
== DESCRIPTION:
|
21
6
|
|
22
7
|
Chronic is a natural language date/time parser written in pure Ruby. See below for the wide variety of formats Chronic will parse.
|
@@ -25,17 +10,17 @@ Chronic is a natural language date/time parser written in pure Ruby. See below f
|
|
25
10
|
|
26
11
|
Chronic can be installed via RubyGems:
|
27
12
|
|
28
|
-
$ sudo gem install
|
29
|
-
|
13
|
+
$ sudo gem install chronic
|
14
|
+
|
30
15
|
== CODE:
|
31
16
|
|
32
17
|
Browse the code and get an RSS feed of the commit log at:
|
33
18
|
|
34
|
-
http://github.com/
|
19
|
+
http://github.com/mojombo/chronic.git
|
35
20
|
|
36
21
|
You can grab the code (and help with development) via git:
|
37
22
|
|
38
|
-
$ git clone git://github.com/
|
23
|
+
$ git clone git://github.com/mojombo/chronic.git
|
39
24
|
|
40
25
|
== USAGE:
|
41
26
|
|
@@ -48,7 +33,7 @@ You can parse strings containing a natural language date using the Chronic.parse
|
|
48
33
|
|
49
34
|
#---
|
50
35
|
|
51
|
-
Chronic.parse('tomorrow')
|
36
|
+
Chronic.parse('tomorrow')
|
52
37
|
#=> Mon Aug 28 12:00:00 PDT 2006
|
53
38
|
|
54
39
|
Chronic.parse('monday', :context => :past)
|
@@ -153,7 +138,7 @@ local time zone. You can set this to something like ActiveSupport's TimeZone cl
|
|
153
138
|
=> Thu, 15 Jun 2006 05:45:00 UTC +00:00
|
154
139
|
|
155
140
|
== LIMITATIONS:
|
156
|
-
|
141
|
+
|
157
142
|
Chronic uses Ruby's built in Time class for all time storage and computation. Because of this, only times that the Time class can handle will be properly parsed. Parsing for times outside of this range will simply return nil. Support for a wider range of times is planned for a future release.
|
158
143
|
|
159
144
|
== LICENSE:
|
data/lib/chronic.rb
CHANGED
@@ -7,10 +7,9 @@
|
|
7
7
|
#
|
8
8
|
#=============================================================================
|
9
9
|
|
10
|
-
|
10
|
+
$:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
|
11
11
|
|
12
|
-
require '
|
13
|
-
require 'core_ext/time'
|
12
|
+
require 'time'
|
14
13
|
|
15
14
|
require 'chronic/chronic'
|
16
15
|
require 'chronic/handlers'
|
@@ -28,7 +27,6 @@ require 'chronic/repeaters/repeater_weekday'
|
|
28
27
|
require 'chronic/repeaters/repeater_day'
|
29
28
|
require 'chronic/repeaters/repeater_day_name'
|
30
29
|
require 'chronic/repeaters/repeater_day_portion'
|
31
|
-
require 'chronic/repeaters/repeater_decade'
|
32
30
|
require 'chronic/repeaters/repeater_hour'
|
33
31
|
require 'chronic/repeaters/repeater_minute'
|
34
32
|
require 'chronic/repeaters/repeater_second'
|
@@ -40,18 +38,97 @@ require 'chronic/scalar'
|
|
40
38
|
require 'chronic/ordinal'
|
41
39
|
require 'chronic/separator'
|
42
40
|
require 'chronic/time_zone'
|
43
|
-
require 'chronic/blunt.rb'
|
44
41
|
|
45
42
|
require 'numerizer/numerizer'
|
46
43
|
|
47
44
|
module Chronic
|
48
|
-
|
45
|
+
VERSION = "0.3.9"
|
46
|
+
|
47
|
+
class << self
|
48
|
+
attr_accessor :debug
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
50
|
+
def time_class
|
51
|
+
Thread.current[:chronic_time_class] ||= Time
|
52
|
+
end
|
54
53
|
|
55
|
-
|
56
|
-
|
54
|
+
def time_class=(klass)
|
55
|
+
Thread.current[:chronic_time_class] = klass
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
self.debug = false
|
60
|
+
end
|
61
|
+
|
62
|
+
# class Time
|
63
|
+
# def self.construct(year, month = 1, day = 1, hour = 0, minute = 0, second = 0)
|
64
|
+
# # extra_seconds = second > 60 ? second - 60 : 0
|
65
|
+
# # extra_minutes = minute > 59 ? minute - 59 : 0
|
66
|
+
# # extra_hours = hour > 23 ? hour - 23 : 0
|
67
|
+
# # extra_days = day >
|
68
|
+
#
|
69
|
+
# if month > 12
|
70
|
+
# if month % 12 == 0
|
71
|
+
# year += (month - 12) / 12
|
72
|
+
# month = 12
|
73
|
+
# else
|
74
|
+
# year += month / 12
|
75
|
+
# month = month % 12
|
76
|
+
# end
|
77
|
+
# end
|
78
|
+
#
|
79
|
+
# base = Time.local(year, month)
|
80
|
+
# puts base
|
81
|
+
# offset = ((day - 1) * 24 * 60 * 60) + (hour * 60 * 60) + (minute * 60) + second
|
82
|
+
# puts offset.to_s
|
83
|
+
# date = base + offset
|
84
|
+
# puts date
|
85
|
+
# date
|
86
|
+
# end
|
87
|
+
# end
|
88
|
+
|
89
|
+
class Time
|
90
|
+
def self.construct(year, month = 1, day = 1, hour = 0, minute = 0, second = 0)
|
91
|
+
|
92
|
+
if second >= 60
|
93
|
+
minute += second / 60
|
94
|
+
second = second % 60
|
95
|
+
end
|
96
|
+
|
97
|
+
if minute >= 60
|
98
|
+
hour += minute / 60
|
99
|
+
minute = minute % 60
|
100
|
+
end
|
101
|
+
|
102
|
+
if hour >= 24
|
103
|
+
day += hour / 24
|
104
|
+
hour = hour % 24
|
105
|
+
end
|
106
|
+
|
107
|
+
# determine if there is a day overflow. this is complicated by our crappy calendar
|
108
|
+
# system (non-constant number of days per month)
|
109
|
+
day <= 56 || raise("day must be no more than 56 (makes month resolution easier)")
|
110
|
+
if day > 28
|
111
|
+
# no month ever has fewer than 28 days, so only do this if necessary
|
112
|
+
leap_year = (year % 4 == 0) && !(year % 100 == 0)
|
113
|
+
leap_year_month_days = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
114
|
+
common_year_month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
115
|
+
days_this_month = leap_year ? leap_year_month_days[month - 1] : common_year_month_days[month - 1]
|
116
|
+
if day > days_this_month
|
117
|
+
month += day / days_this_month
|
118
|
+
day = day % days_this_month
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if month > 12
|
123
|
+
if month % 12 == 0
|
124
|
+
year += (month - 12) / 12
|
125
|
+
month = 12
|
126
|
+
else
|
127
|
+
year += month / 12
|
128
|
+
month = month % 12
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
Chronic.time_class.local(year, month, day, hour, minute, second)
|
133
|
+
end
|
57
134
|
end
|
data/lib/chronic/chronic.rb
CHANGED
@@ -1,303 +1,262 @@
|
|
1
1
|
module Chronic
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
# resulting Span
|
263
|
-
def +(seconds)
|
264
|
-
Span.new(self.begin + seconds, self.end + seconds)
|
265
|
-
end
|
266
|
-
|
267
|
-
# Subtract a number of seconds to this span, returning the
|
268
|
-
# resulting Span
|
269
|
-
def -(seconds)
|
270
|
-
self + -seconds
|
271
|
-
end
|
272
|
-
|
273
|
-
# Prints this span in a nice fashion
|
274
|
-
def to_s
|
275
|
-
'(' << self.begin.to_s << '...' << self.end.to_s << ')'
|
276
|
-
end
|
277
|
-
end
|
278
|
-
|
279
|
-
# Tokens are tagged with subclassed instances of this class when
|
280
|
-
# they match specific criteria
|
281
|
-
class Tag #:nodoc:
|
282
|
-
attr_accessor :type
|
283
|
-
|
284
|
-
def initialize(type)
|
285
|
-
@type = type
|
286
|
-
end
|
287
|
-
|
288
|
-
def start=(s)
|
289
|
-
@now = s
|
290
|
-
end
|
291
|
-
end
|
292
|
-
|
293
|
-
# Internal exception
|
294
|
-
class ChronicPain < Exception #:nodoc:
|
295
|
-
|
296
|
-
end
|
297
|
-
|
298
|
-
# This exception is raised if an invalid argument is provided to
|
299
|
-
# any of Chronic's methods
|
300
|
-
class InvalidArgumentException < Exception
|
301
|
-
|
302
|
-
end
|
2
|
+
class << self
|
3
|
+
|
4
|
+
# Parses a string containing a natural language date or time. If the parser
|
5
|
+
# can find a date or time, either a Time or Chronic::Span will be returned
|
6
|
+
# (depending on the value of <tt>:guess</tt>). If no date or time can be found,
|
7
|
+
# +nil+ will be returned.
|
8
|
+
#
|
9
|
+
# Options are:
|
10
|
+
#
|
11
|
+
# [<tt>:context</tt>]
|
12
|
+
# <tt>:past</tt> or <tt>:future</tt> (defaults to <tt>:future</tt>)
|
13
|
+
#
|
14
|
+
# If your string represents a birthday, you can set <tt>:context</tt> to <tt>:past</tt>
|
15
|
+
# and if an ambiguous string is given, it will assume it is in the
|
16
|
+
# past. Specify <tt>:future</tt> or omit to set a future context.
|
17
|
+
#
|
18
|
+
# [<tt>:now</tt>]
|
19
|
+
# Time (defaults to Time.now)
|
20
|
+
#
|
21
|
+
# By setting <tt>:now</tt> to a Time, all computations will be based off
|
22
|
+
# of that time instead of Time.now. If set to nil, Chronic will use Time.now.
|
23
|
+
#
|
24
|
+
# [<tt>:guess</tt>]
|
25
|
+
# +true+ or +false+ (defaults to +true+)
|
26
|
+
#
|
27
|
+
# By default, the parser will guess a single point in time for the
|
28
|
+
# given date or time. If you'd rather have the entire time span returned,
|
29
|
+
# set <tt>:guess</tt> to +false+ and a Chronic::Span will be returned.
|
30
|
+
#
|
31
|
+
# [<tt>:ambiguous_time_range</tt>]
|
32
|
+
# Integer or <tt>:none</tt> (defaults to <tt>6</tt> (6am-6pm))
|
33
|
+
#
|
34
|
+
# If an Integer is given, ambiguous times (like 5:00) will be
|
35
|
+
# assumed to be within the range of that time in the AM to that time
|
36
|
+
# in the PM. For example, if you set it to <tt>7</tt>, then the parser will
|
37
|
+
# look for the time between 7am and 7pm. In the case of 5:00, it would
|
38
|
+
# assume that means 5:00pm. If <tt>:none</tt> is given, no assumption
|
39
|
+
# will be made, and the first matching instance of that time will
|
40
|
+
# be used.
|
41
|
+
def parse(text, specified_options = {})
|
42
|
+
@text = text
|
43
|
+
|
44
|
+
# get options and set defaults if necessary
|
45
|
+
default_options = {:context => :future,
|
46
|
+
:now => Chronic.time_class.now,
|
47
|
+
:guess => true,
|
48
|
+
:ambiguous_time_range => 6,
|
49
|
+
:endian_precedence => nil}
|
50
|
+
options = default_options.merge specified_options
|
51
|
+
|
52
|
+
# handle options that were set to nil
|
53
|
+
options[:context] = :future unless options[:context]
|
54
|
+
options[:now] = Chronic.time_class.now unless options[:context]
|
55
|
+
options[:ambiguous_time_range] = 6 unless options[:ambiguous_time_range]
|
56
|
+
|
57
|
+
# ensure the specified options are valid
|
58
|
+
specified_options.keys.each do |key|
|
59
|
+
default_options.keys.include?(key) || raise(InvalidArgumentException, "#{key} is not a valid option key.")
|
60
|
+
end
|
61
|
+
[:past, :future, :none].include?(options[:context]) || raise(InvalidArgumentException, "Invalid value ':#{options[:context]}' for :context specified. Valid values are :past and :future.")
|
62
|
+
|
63
|
+
# store now for later =)
|
64
|
+
@now = options[:now]
|
65
|
+
|
66
|
+
# put the text into a normal format to ease scanning
|
67
|
+
text = self.pre_normalize(text)
|
68
|
+
|
69
|
+
# get base tokens for each word
|
70
|
+
@tokens = self.base_tokenize(text)
|
71
|
+
|
72
|
+
# scan the tokens with each token scanner
|
73
|
+
[Repeater].each do |tokenizer|
|
74
|
+
@tokens = tokenizer.scan(@tokens, options)
|
75
|
+
end
|
76
|
+
|
77
|
+
[Grabber, Pointer, Scalar, Ordinal, Separator, TimeZone].each do |tokenizer|
|
78
|
+
@tokens = tokenizer.scan(@tokens)
|
79
|
+
end
|
80
|
+
|
81
|
+
# strip any non-tagged tokens
|
82
|
+
@tokens = @tokens.select { |token| token.tagged? }
|
83
|
+
|
84
|
+
if Chronic.debug
|
85
|
+
puts "+---------------------------------------------------"
|
86
|
+
puts "| " + @tokens.to_s
|
87
|
+
puts "+---------------------------------------------------"
|
88
|
+
end
|
89
|
+
|
90
|
+
# do the heavy lifting
|
91
|
+
begin
|
92
|
+
span = self.tokens_to_span(@tokens, options)
|
93
|
+
rescue
|
94
|
+
raise
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
|
98
|
+
# guess a time within a span if required
|
99
|
+
if options[:guess]
|
100
|
+
return self.guess(span)
|
101
|
+
else
|
102
|
+
return span
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Clean up the specified input text by stripping unwanted characters,
|
107
|
+
# converting idioms to their canonical form, converting number words
|
108
|
+
# to numbers (three => 3), and converting ordinal words to numeric
|
109
|
+
# ordinals (third => 3rd)
|
110
|
+
def pre_normalize(text) #:nodoc:
|
111
|
+
normalized_text = text.to_s.downcase
|
112
|
+
normalized_text = numericize_numbers(normalized_text)
|
113
|
+
# completely removing periods breaks decimal minutes, etc.
|
114
|
+
# tests indicate a period should really act as a : in time
|
115
|
+
# and a - in the date. Not exactly sure what to do with that.
|
116
|
+
# If between numbers, assume time and make it a colon.
|
117
|
+
# Will not work for a date like 10.15.2010
|
118
|
+
normalized_text.gsub!(/([0-9])[\.]([0-9])/, '\1:\2')
|
119
|
+
|
120
|
+
# probably not time now, so let's make the rest a space
|
121
|
+
normalized_text.gsub!(/['"\.,]/, ' ')
|
122
|
+
normalized_text.gsub!(/ \-(\d{4})\b/, ' tzminus\1')
|
123
|
+
normalized_text.gsub!(/([\/\-\,\@])/) { ' ' + $1 + ' ' }
|
124
|
+
normalized_text.gsub!(/\btoday\b/, 'this day')
|
125
|
+
normalized_text.gsub!(/\btomm?orr?ow\b/, 'next day')
|
126
|
+
normalized_text.gsub!(/\byesterday\b/, 'last day')
|
127
|
+
normalized_text.gsub!(/\bnoon\b/, '12:00')
|
128
|
+
normalized_text.gsub!(/\bmidnight\b/, '24:00')
|
129
|
+
normalized_text.gsub!(/\bbefore now\b/, 'past')
|
130
|
+
normalized_text.gsub!(/\bnow\b/, 'this second')
|
131
|
+
normalized_text.gsub!(/\b(ago|before)\b/, 'past')
|
132
|
+
normalized_text.gsub!(/\bthis past\b/, 'last')
|
133
|
+
normalized_text.gsub!(/\bthis last\b/, 'last')
|
134
|
+
normalized_text.gsub!(/\b(?:in|during) the (morning)\b/, '\1')
|
135
|
+
normalized_text.gsub!(/\b(?:in the|during the|at) (afternoon|evening|night)\b/, '\1')
|
136
|
+
normalized_text.gsub!(/\btonight\b/, 'this night')
|
137
|
+
normalized_text.gsub!(/\b\d+:?\d*[ap]\b/,'\0m')
|
138
|
+
normalized_text.gsub!(/(\d)([ap]m|oclock)\b/, '\1 \2')
|
139
|
+
normalized_text.gsub!(/\b(hence|after|from)\b/, 'future')
|
140
|
+
normalized_text = numericize_ordinals(normalized_text)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Convert number words to numbers (three => 3)
|
144
|
+
def numericize_numbers(text) #:nodoc:
|
145
|
+
Numerizer.numerize(text)
|
146
|
+
end
|
147
|
+
|
148
|
+
# Convert ordinal words to numeric ordinals (third => 3rd)
|
149
|
+
def numericize_ordinals(text) #:nodoc:
|
150
|
+
text
|
151
|
+
end
|
152
|
+
|
153
|
+
# Split the text on spaces and convert each word into
|
154
|
+
# a Token
|
155
|
+
def base_tokenize(text) #:nodoc:
|
156
|
+
text.split(' ').map { |word| Token.new(word) }
|
157
|
+
end
|
158
|
+
|
159
|
+
# Guess a specific time within the given span
|
160
|
+
def guess(span) #:nodoc:
|
161
|
+
return nil if span.nil?
|
162
|
+
if span.width > 1
|
163
|
+
span.begin + (span.width / 2)
|
164
|
+
else
|
165
|
+
span.begin
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
class Token #:nodoc:
|
171
|
+
attr_accessor :word, :tags
|
172
|
+
|
173
|
+
def initialize(word)
|
174
|
+
@word = word
|
175
|
+
@tags = []
|
176
|
+
end
|
177
|
+
|
178
|
+
# Tag this token with the specified tag
|
179
|
+
def tag(new_tag)
|
180
|
+
@tags << new_tag
|
181
|
+
end
|
182
|
+
|
183
|
+
# Remove all tags of the given class
|
184
|
+
def untag(tag_class)
|
185
|
+
@tags = @tags.select { |m| !m.kind_of? tag_class }
|
186
|
+
end
|
187
|
+
|
188
|
+
# Return true if this token has any tags
|
189
|
+
def tagged?
|
190
|
+
@tags.size > 0
|
191
|
+
end
|
192
|
+
|
193
|
+
# Return the Tag that matches the given class
|
194
|
+
def get_tag(tag_class)
|
195
|
+
matches = @tags.select { |m| m.kind_of? tag_class }
|
196
|
+
#matches.size < 2 || raise("Multiple identical tags found")
|
197
|
+
return matches.first
|
198
|
+
end
|
199
|
+
|
200
|
+
# Print this Token in a pretty way
|
201
|
+
def to_s
|
202
|
+
@word << '(' << @tags.join(', ') << ') '
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# A Span represents a range of time. Since this class extends
|
207
|
+
# Range, you can use #begin and #end to get the beginning and
|
208
|
+
# ending times of the span (they will be of class Time)
|
209
|
+
class Span < Range
|
210
|
+
# Returns the width of this span in seconds
|
211
|
+
def width
|
212
|
+
(self.end - self.begin).to_i
|
213
|
+
end
|
214
|
+
|
215
|
+
# Add a number of seconds to this span, returning the
|
216
|
+
# resulting Span
|
217
|
+
def +(seconds)
|
218
|
+
Span.new(self.begin + seconds, self.end + seconds)
|
219
|
+
end
|
220
|
+
|
221
|
+
# Subtract a number of seconds to this span, returning the
|
222
|
+
# resulting Span
|
223
|
+
def -(seconds)
|
224
|
+
self + -seconds
|
225
|
+
end
|
226
|
+
|
227
|
+
# Prints this span in a nice fashion
|
228
|
+
def to_s
|
229
|
+
'(' << self.begin.to_s << '..' << self.end.to_s << ')'
|
230
|
+
end
|
231
|
+
|
232
|
+
unless RUBY_VERSION =~ /1\.9\./
|
233
|
+
alias :cover? :include?
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
|
238
|
+
# Tokens are tagged with subclassed instances of this class when
|
239
|
+
# they match specific criteria
|
240
|
+
class Tag #:nodoc:
|
241
|
+
attr_accessor :type
|
242
|
+
|
243
|
+
def initialize(type)
|
244
|
+
@type = type
|
245
|
+
end
|
246
|
+
|
247
|
+
def start=(s)
|
248
|
+
@now = s
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Internal exception
|
253
|
+
class ChronicPain < Exception #:nodoc:
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
# This exception is raised if an invalid argument is provided to
|
258
|
+
# any of Chronic's methods
|
259
|
+
class InvalidArgumentException < Exception
|
260
|
+
|
261
|
+
end
|
303
262
|
end
|