gforces-chronik 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/README +161 -0
  2. data/lib/chronik.rb +100 -0
  3. data/lib/chronik/chronik.rb +249 -0
  4. data/lib/chronik/grabber.rb +26 -0
  5. data/lib/chronik/handlers.rb +524 -0
  6. data/lib/chronik/ordinal.rb +40 -0
  7. data/lib/chronik/pointer.rb +27 -0
  8. data/lib/chronik/repeater.rb +129 -0
  9. data/lib/chronik/repeaters/repeater_day.rb +52 -0
  10. data/lib/chronik/repeaters/repeater_day_name.rb +51 -0
  11. data/lib/chronik/repeaters/repeater_day_portion.rb +94 -0
  12. data/lib/chronik/repeaters/repeater_fortnight.rb +70 -0
  13. data/lib/chronik/repeaters/repeater_hour.rb +57 -0
  14. data/lib/chronik/repeaters/repeater_minute.rb +57 -0
  15. data/lib/chronik/repeaters/repeater_month.rb +66 -0
  16. data/lib/chronik/repeaters/repeater_month_name.rb +98 -0
  17. data/lib/chronik/repeaters/repeater_season.rb +150 -0
  18. data/lib/chronik/repeaters/repeater_season_name.rb +45 -0
  19. data/lib/chronik/repeaters/repeater_second.rb +41 -0
  20. data/lib/chronik/repeaters/repeater_time.rb +124 -0
  21. data/lib/chronik/repeaters/repeater_week.rb +73 -0
  22. data/lib/chronik/repeaters/repeater_weekday.rb +77 -0
  23. data/lib/chronik/repeaters/repeater_weekend.rb +65 -0
  24. data/lib/chronik/repeaters/repeater_year.rb +64 -0
  25. data/lib/chronik/scalar.rb +76 -0
  26. data/lib/chronik/separator.rb +91 -0
  27. data/lib/chronik/time_zone.rb +23 -0
  28. data/lib/numerizer/numerizer.rb +100 -0
  29. data/test/suite.rb +9 -0
  30. data/test/test_Chronik.rb +50 -0
  31. data/test/test_Handler.rb +110 -0
  32. data/test/test_Numerizer.rb +38 -0
  33. data/test/test_RepeaterDayName.rb +52 -0
  34. data/test/test_RepeaterFortnight.rb +63 -0
  35. data/test/test_RepeaterHour.rb +65 -0
  36. data/test/test_RepeaterMonth.rb +47 -0
  37. data/test/test_RepeaterMonthName.rb +57 -0
  38. data/test/test_RepeaterTime.rb +72 -0
  39. data/test/test_RepeaterWeek.rb +63 -0
  40. data/test/test_RepeaterWeekday.rb +56 -0
  41. data/test/test_RepeaterWeekend.rb +75 -0
  42. data/test/test_RepeaterYear.rb +63 -0
  43. data/test/test_Span.rb +24 -0
  44. data/test/test_Time.rb +50 -0
  45. data/test/test_Token.rb +26 -0
  46. data/test/test_parsing.rb +711 -0
  47. metadata +103 -0
data/README ADDED
@@ -0,0 +1,161 @@
1
+ Chronik
2
+ by Marcin Ciunelis (G-Forces Polska)
3
+
4
+ == DESCRIPTION:
5
+
6
+ A Polish version of the Tom Preston-Werner's Chronic (http://github.com/mojombo/chronic/tree/master) natural language date/time parser written in pure Ruby. See below for the wide variety of formats Chronik will parse.
7
+
8
+ == INSTALLATION:
9
+
10
+ Chronik can be installed via RubyGems:
11
+
12
+ $ gem sources -a http://gems.github.com
13
+ $ sudo gem install gforces-chronik
14
+
15
+ == CODE:
16
+
17
+ You can grab the code (and help with development) via git:
18
+
19
+ $ git clone git://github.com/gforces/chronik.git
20
+
21
+ == USAGE:
22
+
23
+ You can parse strings containing a natural language date using the Chronik.parse method.
24
+
25
+ require 'rubygems'
26
+ require 'chronik'
27
+
28
+ Time.now #=> Sun Aug 27 23:18:25 PDT 2006
29
+
30
+ #---
31
+
32
+ Chronik.parse('jutro')
33
+ #=> Mon Aug 28 12:00:00 PDT 2006
34
+
35
+ Chronik.parse('ponidziałek', :context => :past)
36
+ #=> Mon Aug 21 12:00:00 PDT 2006
37
+
38
+ Chronik.parse('przyszły wtorek 5:00')
39
+ #=> Tue Aug 29 17:00:00 PDT 2006
40
+
41
+ Chronik.parse('przyszły wtorek 5:00', :ambiguous_time_range => :none)
42
+ #=> Tue Aug 29 05:00:00 PDT 2006
43
+
44
+ Chronik.parse('27 maja', :now => Time.local(2000, 1, 1))
45
+ #=> Sat May 27 12:00:00 PDT 2000
46
+
47
+ Chronik.parse('27 maja', :guess => false)
48
+ #=> Sun May 27 00:00:00 PDT 2007..Mon May 28 00:00:00 PDT 2007
49
+
50
+ See Chronik.parse for detailed usage instructions.
51
+
52
+ == EXAMPLES:
53
+
54
+ Chronik can parse a huge variety of date and time formats. Following is a small sample of strings that will be properly parsed. Parsing is case insensitive and will handle common abbreviations and misspellings.
55
+
56
+ Simple
57
+
58
+ czwartek
59
+ listopad
60
+ lato
61
+ piątek 13:00
62
+ pon 2:35
63
+ 4 po południu
64
+ 6 rano
65
+ piątek 1 po południu
66
+ sob 7 wieczorem
67
+ wczoraj
68
+ dzisiaj
69
+ jutro
70
+ przysły wtorek
71
+ przysły miesiąc
72
+ ostatnia zima
73
+ dzisiaj rano
74
+ wczoraj w nocy
75
+ teraz
76
+ wczoraj o 4:00
77
+ poprzeni piątek o 20:00
78
+ jutro o 6:45 wieczorem
79
+ wczoraj po południu
80
+
81
+ Complex
82
+
83
+ 3 years ago
84
+ 5 months before now
85
+ 7 hours ago
86
+ 7 days from now
87
+ 1 week hence
88
+ in 3 hours
89
+ 1 year ago tomorrow
90
+ 3 months ago saturday at 5:00 pm
91
+ 7 hours before tomorrow at noon
92
+ 3rd wednesday in november
93
+ 3rd month next year
94
+ 3rd thursday this september
95
+ 4th day last week
96
+
97
+ Specific Dates
98
+
99
+ January 5
100
+ dec 25
101
+ may 27th
102
+ October 2006
103
+ oct 06
104
+ jan 3 2010
105
+ february 14, 2004
106
+ 3 jan 2000
107
+ 17 april 85
108
+ 5/27/1979
109
+ 27/5/1979
110
+ 05/06
111
+ 1979-05-27
112
+ Friday
113
+ 5
114
+ 4:00
115
+ 17:00
116
+ 0800
117
+
118
+ Specific Times (many of the above with an added time)
119
+
120
+ January 5 at 7pm
121
+ 1979-05-27 05:00:00
122
+ etc
123
+
124
+ == TIME ZONES:
125
+
126
+ Chronik allows you to set which Time class to use when constructing times. By default, the built in Ruby time class creates times in your system's
127
+ local time zone. You can set this to something like ActiveSupport's TimeZone class to get full time zone support.
128
+
129
+ >> Time.zone = "UTC"
130
+ >> Chronik.time_class = Time.zone
131
+ >> Chronik.parse("June 15 2006 at 5:45 AM")
132
+ => Thu, 15 Jun 2006 05:45:00 UTC +00:00
133
+
134
+ == LIMITATIONS:
135
+
136
+ Chronik uses Ruby's built in Time class for all time storage and computation. Because of this, only times that the Time class can handle will be properly parsed. Parsing for times outside of this range will simply return nil. Support for a wider range of times is planned for a future release.
137
+
138
+ == LICENSE:
139
+
140
+ (The MIT License)
141
+
142
+ Copyright (c) 2009 Marcin Ciunelis (G-Forces Polska)
143
+
144
+ Permission is hereby granted, free of charge, to any person obtaining
145
+ a copy of this software and associated documentation files (the
146
+ "Software"), to deal in the Software without restriction, including
147
+ without limitation the rights to use, copy, modify, merge, publish,
148
+ distribute, sublicense, and/or sell copies of the Software, and to
149
+ permit persons to whom the Software is furnished to do so, subject to
150
+ the following conditions:
151
+
152
+ The above copyright notice and this permission notice shall be
153
+ included in all copies or substantial portions of the Software.
154
+
155
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
156
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
157
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
158
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
159
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
160
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
161
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/lib/chronik.rb ADDED
@@ -0,0 +1,100 @@
1
+ #=============================================================================
2
+ #
3
+ # Name: Chronik
4
+ # Author: Tom Preston-Werner
5
+ # Purpose: Parse natural language dates and times into Time or
6
+ # Chronik::Span objects
7
+ #
8
+ #=============================================================================
9
+
10
+ $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
11
+
12
+ require 'time'
13
+
14
+ require 'chronik/chronik'
15
+ require 'chronik/handlers'
16
+
17
+ require 'chronik/repeater'
18
+ require 'chronik/repeaters/repeater_year'
19
+ require 'chronik/repeaters/repeater_season'
20
+ require 'chronik/repeaters/repeater_season_name'
21
+ require 'chronik/repeaters/repeater_month'
22
+ require 'chronik/repeaters/repeater_month_name'
23
+ require 'chronik/repeaters/repeater_fortnight'
24
+ require 'chronik/repeaters/repeater_week'
25
+ require 'chronik/repeaters/repeater_weekend'
26
+ require 'chronik/repeaters/repeater_weekday'
27
+ require 'chronik/repeaters/repeater_day'
28
+ require 'chronik/repeaters/repeater_day_name'
29
+ require 'chronik/repeaters/repeater_day_portion'
30
+ require 'chronik/repeaters/repeater_hour'
31
+ require 'chronik/repeaters/repeater_minute'
32
+ require 'chronik/repeaters/repeater_second'
33
+ require 'chronik/repeaters/repeater_time'
34
+
35
+ require 'chronik/grabber'
36
+ require 'chronik/pointer'
37
+ require 'chronik/scalar'
38
+ require 'chronik/ordinal'
39
+ require 'chronik/separator'
40
+ require 'chronik/time_zone'
41
+
42
+ require 'numerizer/numerizer'
43
+
44
+ module Chronik
45
+ VERSION = "0.3.0"
46
+
47
+ class << self
48
+ attr_accessor :debug
49
+ attr_accessor :time_class
50
+ end
51
+
52
+ self.debug = false
53
+ self.time_class = Time
54
+ end
55
+
56
+ class Time
57
+ def self.construct(year, month = 1, day = 1, hour = 0, minute = 0, second = 0)
58
+ if second >= 60
59
+ minute += second / 60
60
+ second = second % 60
61
+ end
62
+
63
+ if minute >= 60
64
+ hour += minute / 60
65
+ minute = minute % 60
66
+ end
67
+
68
+ if hour >= 24
69
+ day += hour / 24
70
+ hour = hour % 24
71
+ end
72
+
73
+ # determine if there is a day overflow. this is complicated by our crappy calendar
74
+ # system (non-constant number of days per month)
75
+ day <= 56 || raise("day must be no more than 56 (makes month resolution easier)")
76
+ if day > 28
77
+ # no month ever has fewer than 28 days, so only do this if necessary
78
+ leap_year = (year % 4 == 0) && !(year % 100 == 0)
79
+ leap_year_month_days = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
80
+ common_year_month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
81
+ days_this_month = leap_year ? leap_year_month_days[month - 1] : common_year_month_days[month - 1]
82
+ if day > days_this_month
83
+ month += day / days_this_month
84
+ day = day % days_this_month
85
+ end
86
+ end
87
+
88
+ if month > 12
89
+ if month % 12 == 0
90
+ year += (month - 12) / 12
91
+ month = 12
92
+ else
93
+ year += month / 12
94
+ month = month % 12
95
+ end
96
+ end
97
+
98
+ Chronik.time_class.local(year, month, day, hour, minute, second)
99
+ end
100
+ end
@@ -0,0 +1,249 @@
1
+ module Chronik
2
+ class << self
3
+
4
+ # Parses a string containing a natural language date or time. If the parser
5
+ # can find a date or time, either a Time or Chronik::Span will be returned
6
+ # (depending on the value of <tt>:guess</tt>). If no date or time can be found,
7
+ # +nil+ will be returned.
8
+ #
9
+ # Options are:
10
+ #
11
+ # [<tt>:context</tt>]
12
+ # <tt>:past</tt> or <tt>:future</tt> (defaults to <tt>:future</tt>)
13
+ #
14
+ # If your string represents a birthday, you can set <tt>:context</tt> to <tt>:past</tt>
15
+ # and if an ambiguous string is given, it will assume it is in the
16
+ # past. Specify <tt>:future</tt> or omit to set a future context.
17
+ #
18
+ # [<tt>:now</tt>]
19
+ # Time (defaults to Time.now)
20
+ #
21
+ # By setting <tt>:now</tt> to a Time, all computations will be based off
22
+ # of that time instead of Time.now. If set to nil, Chronik will use Time.now.
23
+ #
24
+ # [<tt>:guess</tt>]
25
+ # +true+ or +false+ (defaults to +true+)
26
+ #
27
+ # By default, the parser will guess a single point in time for the
28
+ # given date or time. If you'd rather have the entire time span returned,
29
+ # set <tt>:guess</tt> to +false+ and a Chronik::Span will be returned.
30
+ #
31
+ # [<tt>:ambiguous_time_range</tt>]
32
+ # Integer or <tt>:none</tt> (defaults to <tt>6</tt> (6am-6pm))
33
+ #
34
+ # If an Integer is given, ambiguous times (like 5:00) will be
35
+ # assumed to be within the range of that time in the AM to that time
36
+ # in the PM. For example, if you set it to <tt>7</tt>, then the parser will
37
+ # look for the time between 7am and 7pm. In the case of 5:00, it would
38
+ # assume that means 5:00pm. If <tt>:none</tt> is given, no assumption
39
+ # will be made, and the first matching instance of that time will
40
+ # be used.
41
+ def parse(text, specified_options = {})
42
+ @text = text
43
+
44
+ # get options and set defaults if necessary
45
+ default_options = {:context => :future,
46
+ :now => Chronik.time_class.now,
47
+ :guess => true,
48
+ :ambiguous_time_range => 6,
49
+ :endian_precedence => nil}
50
+ options = default_options.merge specified_options
51
+
52
+ # handle options that were set to nil
53
+ options[:context] = :future unless options[:context]
54
+ options[:now] = Chronik.time_class.now unless options[:context]
55
+ options[:ambiguous_time_range] = 6 unless options[:ambiguous_time_range]
56
+
57
+ # ensure the specified options are valid
58
+ specified_options.keys.each do |key|
59
+ default_options.keys.include?(key) || raise(InvalidArgumentException, "#{key} is not a valid option key.")
60
+ end
61
+ [:past, :future, :none].include?(options[:context]) || raise(InvalidArgumentException, "Invalid value ':#{options[:context]}' for :context specified. Valid values are :past and :future.")
62
+
63
+ # store now for later =)
64
+ @now = options[:now]
65
+
66
+ # put the text into a normal format to ease scanning
67
+ text = self.pre_normalize(text)
68
+
69
+ # get base tokens for each word
70
+ @tokens = self.base_tokenize(text)
71
+
72
+ # scan the tokens with each token scanner
73
+ [Repeater].each do |tokenizer|
74
+ @tokens = tokenizer.scan(@tokens, options)
75
+ end
76
+
77
+ [Grabber, Pointer, Scalar, Ordinal, Separator, TimeZone].each do |tokenizer|
78
+ @tokens = tokenizer.scan(@tokens)
79
+ end
80
+
81
+ # strip any non-tagged tokens
82
+ @tokens = @tokens.select { |token| token.tagged? }
83
+
84
+ if Chronik.debug
85
+ puts "+---------------------------------------------------"
86
+ puts "| " + @tokens.to_s
87
+ puts "+---------------------------------------------------"
88
+ end
89
+
90
+ # do the heavy lifting
91
+ begin
92
+ span = self.tokens_to_span(@tokens, options)
93
+ rescue
94
+ raise
95
+ return nil
96
+ end
97
+
98
+ # guess a time within a span if required
99
+ if options[:guess]
100
+ return self.guess(span)
101
+ else
102
+ return span
103
+ end
104
+ end
105
+
106
+ # Clean up the specified input text by stripping unwanted characters,
107
+ # converting idioms to their canonical form, converting number words
108
+ # to numbers (three => 3), and converting ordinal words to numeric
109
+ # ordinals (third => 3rd)
110
+ def pre_normalize(text) #:nodoc:
111
+ normalized_text = text.to_s.downcase
112
+ normalized_text = numericize_numbers(normalized_text)
113
+ normalized_text.gsub!(/['"\.,]/, '')
114
+ normalized_text.gsub!(/ \-(\d{4})\b/, ' tzminus\1')
115
+ normalized_text.gsub!(/([\/\-\,\@])/) { ' ' + $1 + ' ' }
116
+ normalized_text.gsub!(/\btoday\b/, 'this day')
117
+ normalized_text.gsub!(/\btomm?orr?ow\b/, 'next day')
118
+ normalized_text.gsub!(/\byesterday\b/, 'last day')
119
+ normalized_text.gsub!(/\bnoon\b/, '12:00')
120
+ normalized_text.gsub!(/\bmidnight\b/, '24:00')
121
+ normalized_text.gsub!(/\bbefore now\b/, 'past')
122
+ normalized_text.gsub!(/\bnow\b/, 'this second')
123
+ normalized_text.gsub!(/\b(ago|before)\b/, 'past')
124
+ normalized_text.gsub!(/\bthis past\b/, 'last')
125
+ normalized_text.gsub!(/\bthis last\b/, 'last')
126
+ normalized_text.gsub!(/\b(?:in|during) the (morning)\b/, '\1')
127
+ normalized_text.gsub!(/\b(?:in the|during the|at) (afternoon|evening|night)\b/, '\1')
128
+ normalized_text.gsub!(/\btonight\b/, 'this night')
129
+ normalized_text.gsub!(/\b\d+:?\d*[ap]\b/,'\0m')
130
+ normalized_text.gsub!(/(\d)([ap]m|oclock)\b/, '\1 \2')
131
+ normalized_text.gsub!(/\b(hence|after|from)\b/, 'future')
132
+ normalized_text = numericize_ordinals(normalized_text)
133
+ end
134
+
135
+ # Convert number words to numbers (three => 3)
136
+ def numericize_numbers(text) #:nodoc:
137
+ Numerizer.numerize(text)
138
+ end
139
+
140
+ # Convert ordinal words to numeric ordinals (third => 3rd)
141
+ def numericize_ordinals(text) #:nodoc:
142
+ text
143
+ end
144
+
145
+ # Split the text on spaces and convert each word into
146
+ # a Token
147
+ def base_tokenize(text) #:nodoc:
148
+ text.split(' ').map { |word| Token.new(word) }
149
+ end
150
+
151
+ # Guess a specific time within the given span
152
+ def guess(span) #:nodoc:
153
+ return nil if span.nil?
154
+ if span.width > 1
155
+ span.begin + (span.width / 2)
156
+ else
157
+ span.begin
158
+ end
159
+ end
160
+ end
161
+
162
+ class Token #:nodoc:
163
+ attr_accessor :word, :tags
164
+
165
+ def initialize(word)
166
+ @word = word
167
+ @tags = []
168
+ end
169
+
170
+ # Tag this token with the specified tag
171
+ def tag(new_tag)
172
+ @tags << new_tag
173
+ end
174
+
175
+ # Remove all tags of the given class
176
+ def untag(tag_class)
177
+ @tags = @tags.select { |m| !m.kind_of? tag_class }
178
+ end
179
+
180
+ # Return true if this token has any tags
181
+ def tagged?
182
+ @tags.size > 0
183
+ end
184
+
185
+ # Return the Tag that matches the given class
186
+ def get_tag(tag_class)
187
+ matches = @tags.select { |m| m.kind_of? tag_class }
188
+ #matches.size < 2 || raise("Multiple identical tags found")
189
+ return matches.first
190
+ end
191
+
192
+ # Print this Token in a pretty way
193
+ def to_s
194
+ @word << '(' << @tags.join(', ') << ') '
195
+ end
196
+ end
197
+
198
+ # A Span represents a range of time. Since this class extends
199
+ # Range, you can use #begin and #end to get the beginning and
200
+ # ending times of the span (they will be of class Time)
201
+ class Span < Range
202
+ # Returns the width of this span in seconds
203
+ def width
204
+ (self.end - self.begin).to_i
205
+ end
206
+
207
+ # Add a number of seconds to this span, returning the
208
+ # resulting Span
209
+ def +(seconds)
210
+ Span.new(self.begin + seconds, self.end + seconds)
211
+ end
212
+
213
+ # Subtract a number of seconds to this span, returning the
214
+ # resulting Span
215
+ def -(seconds)
216
+ self + -seconds
217
+ end
218
+
219
+ # Prints this span in a nice fashion
220
+ def to_s
221
+ '(' << self.begin.to_s << '..' << self.end.to_s << ')'
222
+ end
223
+ end
224
+
225
+ # Tokens are tagged with subclassed instances of this class when
226
+ # they match specific criteria
227
+ class Tag #:nodoc:
228
+ attr_accessor :type
229
+
230
+ def initialize(type)
231
+ @type = type
232
+ end
233
+
234
+ def start=(s)
235
+ @now = s
236
+ end
237
+ end
238
+
239
+ # Internal exception
240
+ class ChronikPain < Exception #:nodoc:
241
+
242
+ end
243
+
244
+ # This exception is raised if an invalid argument is provided to
245
+ # any of Chronik's methods
246
+ class InvalidArgumentException < Exception
247
+
248
+ end
249
+ end