midnight 0.0.1.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rubocop_todo.yml +278 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE +339 -0
- data/LICENSE.txt +22 -0
- data/README.md +30 -0
- data/README.textile +20 -0
- data/Rakefile +12 -0
- data/lib/midnight.rb +42 -0
- data/lib/midnight/converter.rb +152 -0
- data/lib/midnight/cron_expression.rb +38 -0
- data/lib/midnight/handler.rb +104 -0
- data/lib/midnight/midnight.rb +91 -0
- data/lib/midnight/repeater.rb +153 -0
- data/lib/midnight/version.rb +3 -0
- data/lib/numerizer/numerizer.rb +103 -0
- data/midnight.gemspec +24 -0
- data/test/helper.rb +10 -0
- data/test/test_cron_expression.rb +18 -0
- data/test/test_parsing.rb +68 -0
- data/todo.txt +21 -0
- metadata +112 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 bluefuton
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# Midnight
|
2
|
+
|
3
|
+
A library to parse natural language date/time into a cron expression.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'midnight'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install midnight
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
<pre>Midnight.parse('every 5 minutes').to_s
|
22
|
+
=> "*/5 * * * *"</pre>
|
23
|
+
|
24
|
+
## Contributing
|
25
|
+
|
26
|
+
1. Fork it ( http://github.com/bluefuton/midnight/fork )
|
27
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
28
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
29
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
30
|
+
5. Create new Pull Request
|
data/README.textile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
h1. midnight
|
2
|
+
|
3
|
+
<img src="https://api.travis-ci.org/bluefuton/midnight.png" alt="Travis build status" />
|
4
|
+
|
5
|
+
A library to parse natural language date/time into a cron expression.
|
6
|
+
|
7
|
+
<pre>Midnight.parse('every 5 minutes').to_s
|
8
|
+
=> "*/5 * * * *"</pre>
|
9
|
+
|
10
|
+
h2. Supported phrases
|
11
|
+
|
12
|
+
A full list of supported natural language phrases can be found in <a href="https://github.com/bluefuton/midnight/blob/develop/test/test_parsing.rb">test_parsing.rb</a>.
|
13
|
+
|
14
|
+
In the future there'll be support for more complex repetitions - a wishlist can be found in <a href="https://github.com/bluefuton/midnight/blob/develop/todo.txt">todo.txt</a>.
|
15
|
+
|
16
|
+
h2. Credits
|
17
|
+
|
18
|
+
My tokeniser code is based on the excellent <a href="https://github.com/yb66/tickle">Tickle</a> gem, which in turn relies on <a href="https://github.com/mojombo/chronic">Chronic</a> for date parsing.
|
19
|
+
|
20
|
+
Author: Chris Rosser <chris@bluefuton.com>
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'bundler/gem_tasks'
|
5
|
+
|
6
|
+
task :default => 'test'
|
7
|
+
|
8
|
+
Rake::TestTask.new(:test) do |test|
|
9
|
+
test.libs << 'lib' << 'test'
|
10
|
+
test.pattern = 'test/**/test_*.rb'
|
11
|
+
#test.verbose = true
|
12
|
+
end
|
data/lib/midnight.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#=============================================================================
|
2
|
+
#
|
3
|
+
# Name: Midnight
|
4
|
+
# Author: Chris Rosser
|
5
|
+
# Purpose: Parse natural language date/time into a cron expression
|
6
|
+
#
|
7
|
+
#=============================================================================
|
8
|
+
|
9
|
+
$:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
|
10
|
+
|
11
|
+
require 'date'
|
12
|
+
require 'time'
|
13
|
+
require 'chronic'
|
14
|
+
|
15
|
+
require 'midnight/midnight'
|
16
|
+
require 'midnight/handler'
|
17
|
+
require 'midnight/repeater'
|
18
|
+
require 'midnight/cron_expression'
|
19
|
+
require 'midnight/converter'
|
20
|
+
require 'midnight/version'
|
21
|
+
|
22
|
+
module Midnight
|
23
|
+
def self.debug; false; end
|
24
|
+
|
25
|
+
def self.dwrite(msg)
|
26
|
+
puts msg if Midnight.debug
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Date
|
31
|
+
def days_in_month
|
32
|
+
d,m,y = mday,month,year
|
33
|
+
d += 1 while Date.valid_civil?(y,m,d)
|
34
|
+
d - 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class Array
|
39
|
+
def same?(y)
|
40
|
+
self.sort == y.sort
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
class Midnight::Converter
|
2
|
+
attr_accessor :expr, :tokens
|
3
|
+
|
4
|
+
def convert_tokens_to_cron_expression(tokens)
|
5
|
+
@expr = Midnight::CronExpression.new
|
6
|
+
@tokens = tokens
|
7
|
+
|
8
|
+
return @expr if @tokens.empty? || tokens.detect { |t| t.type == :special }
|
9
|
+
|
10
|
+
detect_minute_repetition
|
11
|
+
detect_hour_repetition
|
12
|
+
detect_day_repetition
|
13
|
+
detect_weekday_repetition
|
14
|
+
detect_week_repetition
|
15
|
+
detect_month_repetition
|
16
|
+
detect_year_repetition
|
17
|
+
|
18
|
+
#puts tokens.inspect
|
19
|
+
@expr
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
def detect_minute_repetition
|
24
|
+
@tokens.each do |token|
|
25
|
+
if (token.type == :minute_word)
|
26
|
+
num_token = tokens.detect { |t| t.type == :number }
|
27
|
+
hour_token = tokens.detect { |t| t.type == :hour }
|
28
|
+
if num_token.is_a?(Midnight::Token)
|
29
|
+
@expr.minute = '*/' + num_token.interval.to_s
|
30
|
+
elsif !hour_token.nil?
|
31
|
+
@expr.hour = adjust_hour_for_meridiem(hour_token.word)
|
32
|
+
@expr.minute = token.word
|
33
|
+
elsif @tokens.length == 1
|
34
|
+
@expr.force_run_every_minute = true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if (token.type == :minute)
|
39
|
+
@expr.minute = token.word
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def detect_hour_repetition
|
45
|
+
num_token = tokens.detect { |t| t.type == :number }
|
46
|
+
|
47
|
+
@tokens.each do |token|
|
48
|
+
if (token.type == :hour)
|
49
|
+
@expr.minute = 0 if @expr.minute.nil?
|
50
|
+
num_token = tokens.detect { |t| t.type == :number }
|
51
|
+
if num_token.is_a?(Midnight::Token)
|
52
|
+
@expr.hour = '*/' + num_token.interval.to_s
|
53
|
+
elsif @tokens.length == 1
|
54
|
+
@expr.hour = nil
|
55
|
+
else
|
56
|
+
@expr.hour = adjust_hour_for_meridiem(token.word)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
if (token.type == :meridiem && !num_token.nil?)
|
61
|
+
@expr.hour = adjust_hour_for_meridiem(num_token.word)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def detect_week_repetition
|
67
|
+
token = @tokens.first
|
68
|
+
|
69
|
+
if (token.type == :week && token.interval == 7)
|
70
|
+
@expr.day_of_week = 0
|
71
|
+
@expr.hour = 0
|
72
|
+
@expr.minute = 0
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def detect_weekday_repetition
|
77
|
+
token = @tokens.detect { |t| t.type == :weekday }
|
78
|
+
if (!token.nil?)
|
79
|
+
@expr.day_of_week = token.position_in_sequence
|
80
|
+
if !@tokens.detect { |t| t.type == :minute_word }
|
81
|
+
@expr.hour = 0 if @expr.hour.nil?
|
82
|
+
end
|
83
|
+
@expr.minute = 0 if @expr.minute.nil?
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def detect_day_repetition
|
88
|
+
@tokens.each do |token|
|
89
|
+
if (token.type == :day && token.interval == 1)
|
90
|
+
@expr.minute = '0'
|
91
|
+
|
92
|
+
# Do we need to run it at a specific time?
|
93
|
+
hour_token = tokens.detect { |t| t.type == :number || t.type == :hour }
|
94
|
+
if hour_token.is_a?(Midnight::Token)
|
95
|
+
|
96
|
+
hour = hour_token.interval if hour_token.type == :number
|
97
|
+
hour = hour_token.word if hour_token.type == :hour
|
98
|
+
|
99
|
+
# Is there a meridiem token (am/pm) too?
|
100
|
+
hour = adjust_hour_for_meridiem(hour)
|
101
|
+
|
102
|
+
# Is a minute specified?
|
103
|
+
minute_token = tokens.detect { |t| t.type == :minute }
|
104
|
+
if minute_token.is_a?(Midnight::Token)
|
105
|
+
@expr.minute = minute_token.word
|
106
|
+
end
|
107
|
+
|
108
|
+
@expr.hour = hour
|
109
|
+
else
|
110
|
+
@expr.hour = 0
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def detect_month_repetition
|
117
|
+
token = @tokens.first
|
118
|
+
if (token.type == :month)
|
119
|
+
@expr.day_of_month = 1
|
120
|
+
@expr.hour = 0
|
121
|
+
@expr.minute = 0
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def detect_year_repetition
|
126
|
+
token = @tokens.first
|
127
|
+
if (token.type == :year || token.type == :month_name)
|
128
|
+
@expr.day_of_month = 1
|
129
|
+
@expr.hour = 0
|
130
|
+
@expr.minute = 0
|
131
|
+
@expr.month = 1
|
132
|
+
@expr.month = token.position_in_sequence if token.type == :month_name
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def adjust_hour_for_meridiem(hour)
|
137
|
+
hour = hour.to_i
|
138
|
+
|
139
|
+
# Is there a meridiem token (am/pm)?
|
140
|
+
meridiem_token = @tokens.detect { |t| t.type == :meridiem }
|
141
|
+
|
142
|
+
if (!meridiem_token.nil? && meridiem_token.word == 'pm' && hour < 12)
|
143
|
+
hour = hour + 12
|
144
|
+
end
|
145
|
+
|
146
|
+
if hour == 24
|
147
|
+
hour = 0
|
148
|
+
end
|
149
|
+
|
150
|
+
hour
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# * * * * * command to execute
|
2
|
+
# ┬ ┬ ┬ ┬ ┬
|
3
|
+
# │ │ │ │ │
|
4
|
+
# │ │ │ │ │
|
5
|
+
# │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0)
|
6
|
+
# │ │ │ └────────── month (1 - 12)
|
7
|
+
# │ │ └─────────────── day of month (1 - 31)
|
8
|
+
# │ └──────────────────── hour (0 - 23)
|
9
|
+
# └───────────────────────── min (0 - 59)
|
10
|
+
class Midnight::CronExpression #:nodoc:
|
11
|
+
attr_accessor :minute, :hour, :day_of_month, :month, :day_of_week, :force_run_every_minute
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
return '* * * * *' if (@force_run_every_minute === true)
|
15
|
+
|
16
|
+
expression_parts = [
|
17
|
+
get_attribute(:minute),
|
18
|
+
get_attribute(:hour),
|
19
|
+
get_attribute(:day_of_month),
|
20
|
+
get_attribute(:month),
|
21
|
+
get_attribute(:day_of_week)
|
22
|
+
]
|
23
|
+
|
24
|
+
# Better to return nil than accidentally recommend that people run a job every minute
|
25
|
+
# Set force_run_every_minute to true to return * * * * *
|
26
|
+
if (expression_parts.select { |x| x != '*'}.empty?)
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
|
30
|
+
expression_parts.join(' ')
|
31
|
+
end
|
32
|
+
|
33
|
+
protected
|
34
|
+
def get_attribute(symbol)
|
35
|
+
attribute = instance_variable_get('@' + symbol.to_s)
|
36
|
+
attribute.nil? ? '*' : attribute.to_s
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module Midnight
|
2
|
+
class << self
|
3
|
+
|
4
|
+
def guess()
|
5
|
+
# Not sure we'll need interval for cron output
|
6
|
+
interval = guess_unit_types
|
7
|
+
interval ||= guess_weekday
|
8
|
+
interval ||= guess_weekday
|
9
|
+
interval ||= guess_month_names
|
10
|
+
interval ||= guess_number_and_unit
|
11
|
+
interval ||= guess_special
|
12
|
+
|
13
|
+
# defines the next occurrence of this midnight if not set in a guess routine
|
14
|
+
@next ||= @start + (interval * 60 * 60 * 24) if interval
|
15
|
+
|
16
|
+
# # check to see if the start date is > NOW and, if so, set the next occurrence = start
|
17
|
+
@next = @start if @start.to_time > Time.now
|
18
|
+
|
19
|
+
# # return the next occurrence
|
20
|
+
return @next.to_time if interval
|
21
|
+
end
|
22
|
+
|
23
|
+
def guess_unit_types
|
24
|
+
interval = 1 if token_types.same?([:day])
|
25
|
+
interval = 7 if token_types.same?([:week])
|
26
|
+
interval = 30 if token_types.same?([:month])
|
27
|
+
interval = 365 if token_types.same?([:year])
|
28
|
+
interval
|
29
|
+
end
|
30
|
+
|
31
|
+
def guess_weekday
|
32
|
+
if token_types.same?([:weekday]) then
|
33
|
+
@start = Chronic.parse(token_of_type(:weekday).start.to_s)
|
34
|
+
interval = 7
|
35
|
+
end
|
36
|
+
interval
|
37
|
+
end
|
38
|
+
|
39
|
+
def guess_month_names
|
40
|
+
if token_types.same?([:month_name]) then
|
41
|
+
@start = Chronic.parse("#{token_of_type(:month_name).start.to_s} 1")
|
42
|
+
interval = 30
|
43
|
+
end
|
44
|
+
interval
|
45
|
+
end
|
46
|
+
|
47
|
+
def guess_number_and_unit
|
48
|
+
interval = token_of_type(:number).interval if token_types.same?([:number, :day])
|
49
|
+
interval = (token_of_type(:number).interval * 7) if token_types.same?([:number, :week])
|
50
|
+
interval = (token_of_type(:number).interval * 30) if token_types.same?([:number, :month])
|
51
|
+
interval = (token_of_type(:number).interval * 365) if token_types.same?([:number, :year])
|
52
|
+
interval
|
53
|
+
end
|
54
|
+
|
55
|
+
def guess_special
|
56
|
+
interval = guess_special_other
|
57
|
+
interval ||= guess_special_beginning
|
58
|
+
interval ||= guess_special_middle
|
59
|
+
interval ||= guess_special_end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def guess_special_other
|
65
|
+
interval = 2 if token_types.same?([:special, :day]) && token_of_type(:special).start == :other
|
66
|
+
interval = 14 if token_types.same?([:special, :week]) && token_of_type(:special).start == :other
|
67
|
+
if token_types.same?([:special, :month]) && token_of_type(:special).start == :other then interval = 60; @next = Chronic.parse('2 months from now'); end
|
68
|
+
if token_types.same?([:special, :year]) && token_of_type(:special).start == :other then interval = 730; @next = Chronic.parse('2 years from now'); end
|
69
|
+
interval
|
70
|
+
end
|
71
|
+
|
72
|
+
def guess_special_beginning
|
73
|
+
if token_types.same?([:special, :week]) && token_of_type(:special).start == :beginning then interval = 7; @start = Chronic.parse('Sunday'); end
|
74
|
+
if token_types.same?([:special, :month]) && token_of_type(:special).start == :beginning then interval = 30; @start = Chronic.parse('1st day next month'); end
|
75
|
+
if token_types.same?([:special, :year]) && token_of_type(:special).start == :beginning then interval = 365; @start = Chronic.parse('1st day next year'); end
|
76
|
+
interval
|
77
|
+
end
|
78
|
+
|
79
|
+
def guess_special_end
|
80
|
+
if token_types.same?([:special, :week]) && token_of_type(:special).start == :end then interval = 7; @start = Chronic.parse('Saturday'); end
|
81
|
+
if token_types.same?([:special, :month]) && token_of_type(:special).start == :end then interval = 30; @start = Date.new(Date.today.year, Date.today.month, Date.today.days_in_month); end
|
82
|
+
if token_types.same?([:special, :year]) && token_of_type(:special).start == :end then interval = 365; @start = Date.new(Date.today.year, 12, 31); end
|
83
|
+
interval
|
84
|
+
end
|
85
|
+
|
86
|
+
def guess_special_middle
|
87
|
+
if token_types.same?([:special, :week]) && token_of_type(:special).start == :middle then interval = 7; @start = Chronic.parse('Wednesday'); end
|
88
|
+
if token_types.same?([:special, :month]) && token_of_type(:special).start == :middle then
|
89
|
+
interval = 30;
|
90
|
+
@start = (Date.today.day >= 15 ? Chronic.parse('15th day of next month') : Date.new(Date.today.year, Date.today.month, 15))
|
91
|
+
end
|
92
|
+
if token_types.same?([:special, :year]) && token_of_type(:special).start == :middle then
|
93
|
+
interval = 365;
|
94
|
+
@start = (Date.today.day >= 15 && Date.today.month >= 6 ? Date.new(Date.today.year+1, 6, 15) : Date.new(Date.today.year, 6, 15))
|
95
|
+
end
|
96
|
+
interval
|
97
|
+
end
|
98
|
+
|
99
|
+
def token_of_type(type)
|
100
|
+
@tokens.detect {|token| token.type == type}
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require_relative "../numerizer/numerizer.rb"
|
2
|
+
|
3
|
+
module Midnight
|
4
|
+
class << self
|
5
|
+
|
6
|
+
def parse(text, specified_options = {})
|
7
|
+
# get options and set defaults if necessary
|
8
|
+
default_options = {:start => Time.now}
|
9
|
+
options = default_options.merge specified_options
|
10
|
+
|
11
|
+
# ensure the specified options are valid
|
12
|
+
specified_options.keys.each do |key|
|
13
|
+
default_options.keys.include?(key) || raise(InvalidArgumentException, "#{key} is not a valid option key.")
|
14
|
+
end
|
15
|
+
Chronic.parse(specified_options[:start]) || raise(InvalidArgumentException, ':start specified is not a valid datetime.') if specified_options[:start]
|
16
|
+
|
17
|
+
# remove every is specified
|
18
|
+
text = text.gsub(/^every\s\b/, '')
|
19
|
+
|
20
|
+
# put the text into a normal format to ease scanning using Chronic
|
21
|
+
text = pre_normalize(text)
|
22
|
+
text = Chronic::Parser.new.pre_normalize(text)
|
23
|
+
text = numericize_ordinals(text)
|
24
|
+
|
25
|
+
# check to see if this event starts some other time and reset now
|
26
|
+
event, starting = text.split('starting')
|
27
|
+
@start = (Chronic.parse(starting) || options[:start])
|
28
|
+
@next = nil
|
29
|
+
|
30
|
+
# split into tokens
|
31
|
+
@tokens = base_tokenize(event)
|
32
|
+
|
33
|
+
# scan the tokens with each token scanner
|
34
|
+
@tokens = Repeater.scan(@tokens)
|
35
|
+
|
36
|
+
# remove all tokens without a type
|
37
|
+
@tokens.reject! {|token| token.type.nil? }
|
38
|
+
|
39
|
+
converter = Converter.new
|
40
|
+
cron_expression = converter.convert_tokens_to_cron_expression(@tokens)
|
41
|
+
|
42
|
+
return cron_expression
|
43
|
+
end
|
44
|
+
|
45
|
+
# Normalize natural string removing prefix language
|
46
|
+
def pre_normalize(text)
|
47
|
+
normalized_text = text.gsub(/^every\s\b/, '')
|
48
|
+
normalized_text = text.gsub(/^each\s\b/, '')
|
49
|
+
normalized_text = text.gsub(/^on the\s\b/, '')
|
50
|
+
normalized_text
|
51
|
+
end
|
52
|
+
|
53
|
+
# Split the text on spaces and convert each word into
|
54
|
+
# a Token
|
55
|
+
def base_tokenize(text) #:nodoc:
|
56
|
+
text.split(' ').map { |word| Token.new(word) }
|
57
|
+
end
|
58
|
+
|
59
|
+
# Convert ordinal words to numeric ordinals (third => 3rd)
|
60
|
+
def numericize_ordinals(text) #:nodoc:
|
61
|
+
text = text.gsub(/\b(\d*)(st|nd|rd|th)\b/, '\1')
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns an array of types for all tokens
|
65
|
+
def token_types
|
66
|
+
@tokens.map(&:type)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class Token #:nodoc:
|
71
|
+
attr_accessor :word, :type, :interval, :start, :position_in_sequence
|
72
|
+
|
73
|
+
def initialize(word)
|
74
|
+
@word = word
|
75
|
+
@type = @interval = @start = nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def update(type, start=nil, interval=nil, position_in_sequence=nil)
|
79
|
+
@start = start
|
80
|
+
@type = type
|
81
|
+
@interval = interval
|
82
|
+
@position_in_sequence = position_in_sequence
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# This exception is raised if an invalid argument is provided to
|
87
|
+
# any of Midnight's methods
|
88
|
+
class InvalidArgumentException < Exception
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|