chronic_distance 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +22 -0
- data/.gitignore +5 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +78 -0
- data/Rakefile +39 -0
- data/VERSION.yml +4 -0
- data/chronic_distance.gemspec +51 -0
- data/lib/chronic_distance.rb +215 -0
- data/lib/numerizer.rb +99 -0
- data/spec/chronic_distance_spec.rb +241 -0
- data/spec/spec_helper.rb +10 -0
- metadata +73 -0
data/.autotest
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'autotest/redgreen'
|
2
|
+
|
3
|
+
Autotest.add_hook :initialize do |at|
|
4
|
+
|
5
|
+
at.clear_mappings
|
6
|
+
|
7
|
+
# Ignore anything that doesn't affect spec outcomes
|
8
|
+
at.add_exception %r%\.git|MIT-LICENSE|README.rdoc|Rakefile|init\.rb|install\.rb|tasks|uninstall\.rb%
|
9
|
+
|
10
|
+
# Specs should run themselves
|
11
|
+
at.add_mapping %r%^spec/.*_spec\.rb$% do |f, _|
|
12
|
+
f
|
13
|
+
end
|
14
|
+
|
15
|
+
at.add_mapping %r%^lib/chronic_duration\.rb$% do
|
16
|
+
at.files_matching %r%^spec/.*_spec\.rb$%
|
17
|
+
end
|
18
|
+
|
19
|
+
at.add_mapping %r%^lib/chronic_duration/.*\.rb$% do |_, m|
|
20
|
+
at.files_matching %r%^spec/.*_spec\.rb$%
|
21
|
+
end
|
22
|
+
end
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Olivier Lauzon
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
= Chronic Distance
|
2
|
+
|
3
|
+
A simple Ruby natural language parser for distances. Given a parse-able string distance, it returns the distance in millimeters.
|
4
|
+
|
5
|
+
The reverse can also be accomplished with the output method. Given a distance in millimeters, it outputs a string like "4 kilometers" or "4km" depending on the desired format.
|
6
|
+
|
7
|
+
|
8
|
+
== Installation
|
9
|
+
|
10
|
+
$ sudo gem sources -a http://gems.github.com
|
11
|
+
$ sudo gem install olauzon-chronic_distance
|
12
|
+
|
13
|
+
|
14
|
+
== Usage
|
15
|
+
|
16
|
+
>> require 'chronic_distance'
|
17
|
+
=> true
|
18
|
+
|
19
|
+
===Parsing examples
|
20
|
+
|
21
|
+
>> ChronicDistance.parse('5 kilometers')
|
22
|
+
=> 5000000
|
23
|
+
|
24
|
+
>> ChronicDistance.parse('4 miles')
|
25
|
+
=> 6437376
|
26
|
+
|
27
|
+
>> ChronicDistance.parse('four miles')
|
28
|
+
=> 6437376
|
29
|
+
|
30
|
+
>> ChronicDistance.parse('1000m')
|
31
|
+
=> 1000000
|
32
|
+
|
33
|
+
>> ChronicDistance.parse('1000 meters')
|
34
|
+
=> 1000000
|
35
|
+
|
36
|
+
>> ChronicDistance.parse('ten yards')
|
37
|
+
=> 9144.0
|
38
|
+
|
39
|
+
>> ChronicDistance.parse('500 feet')
|
40
|
+
=> 152400.0
|
41
|
+
|
42
|
+
Nil is returned if the string can't be parsed:
|
43
|
+
|
44
|
+
>> ChronicDistance.parse('kilometers')
|
45
|
+
=> nil
|
46
|
+
|
47
|
+
===Output examples
|
48
|
+
|
49
|
+
>> ChronicDistance.output(5000000)
|
50
|
+
=> 5000000 mm
|
51
|
+
|
52
|
+
>> ChronicDistance.output(5000000, :format => :short, :unit => 'kilometers')
|
53
|
+
=> 5km
|
54
|
+
|
55
|
+
>> ChronicDistance.output(5000000, :format => :long, :unit => 'kilometers')
|
56
|
+
=> 5 kilometers
|
57
|
+
|
58
|
+
>> ChronicDistance.output(5000000, :unit => 'meters')
|
59
|
+
=> 5000 m
|
60
|
+
|
61
|
+
>> ChronicDistance.output(5000000, :format => :long, :unit => 'miles')
|
62
|
+
=> 3.10685596118667 miles
|
63
|
+
|
64
|
+
|
65
|
+
== Inspiration
|
66
|
+
|
67
|
+
ChronicDistance borrows heavily from:
|
68
|
+
|
69
|
+
* hpoydar's ChronicDuration http://github.com/hpoydar/chronic_duration
|
70
|
+
* mojombo's Chronic http://github.com/mojombo/chronic
|
71
|
+
|
72
|
+
|
73
|
+
== TODO
|
74
|
+
|
75
|
+
* Benchmark, optimize
|
76
|
+
* Other locale support
|
77
|
+
|
78
|
+
Copyright (c) 2009 Olivier Lauzon. See MIT-LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "chronic_distance"
|
7
|
+
s.summary = %Q{A simple Ruby natural language parser for distances.}
|
8
|
+
s.email = "olauzon@gmail.com"
|
9
|
+
s.homepage = "http://github.com/olauzon/chronic_distance"
|
10
|
+
s.description = "A simple Ruby natural language parser for distances."
|
11
|
+
s.authors = ["Olivier Lauzon"]
|
12
|
+
end
|
13
|
+
rescue LoadError
|
14
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
15
|
+
end
|
16
|
+
|
17
|
+
require 'rake/rdoctask'
|
18
|
+
Rake::RDocTask.new do |rdoc|
|
19
|
+
rdoc.rdoc_dir = 'rdoc'
|
20
|
+
rdoc.title = 'chronic_distance'
|
21
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
22
|
+
rdoc.rdoc_files.include('README*')
|
23
|
+
rdoc.rdoc_files.include('MIT-LICENSE')
|
24
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'spec/rake/spectask'
|
28
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
29
|
+
t.libs << 'lib' << 'spec'
|
30
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
31
|
+
end
|
32
|
+
|
33
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
34
|
+
t.libs << 'lib' << 'spec'
|
35
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
36
|
+
t.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
data/VERSION.yml
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{chronic_distance}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Olivier Lauzon"]
|
12
|
+
s.date = %q{2010-04-07}
|
13
|
+
s.description = %q{A simple Ruby natural language parser for distances.}
|
14
|
+
s.email = %q{olauzon@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README.rdoc"
|
17
|
+
]
|
18
|
+
s.files = [
|
19
|
+
".autotest",
|
20
|
+
".gitignore",
|
21
|
+
"MIT-LICENSE",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION.yml",
|
25
|
+
"chronic_distance.gemspec",
|
26
|
+
"lib/chronic_distance.rb",
|
27
|
+
"lib/numerizer.rb",
|
28
|
+
"spec/chronic_distance_spec.rb",
|
29
|
+
"spec/spec_helper.rb"
|
30
|
+
]
|
31
|
+
s.homepage = %q{http://github.com/olauzon/chronic_distance}
|
32
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
33
|
+
s.require_paths = ["lib"]
|
34
|
+
s.rubygems_version = %q{1.3.6}
|
35
|
+
s.summary = %q{A simple Ruby natural language parser for distances.}
|
36
|
+
s.test_files = [
|
37
|
+
"spec/chronic_distance_spec.rb",
|
38
|
+
"spec/spec_helper.rb"
|
39
|
+
]
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
43
|
+
s.specification_version = 3
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
46
|
+
else
|
47
|
+
end
|
48
|
+
else
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,215 @@
|
|
1
|
+
require 'numerizer'
|
2
|
+
|
3
|
+
module ChronicDistance
|
4
|
+
|
5
|
+
extend self
|
6
|
+
|
7
|
+
# Given a string representation of distance,
|
8
|
+
# return an integer (or float) representation
|
9
|
+
# of the distance in millimeters. Accepts an options
|
10
|
+
# hash with :round => true as an option.
|
11
|
+
def parse(string, options = {})
|
12
|
+
result = calculate_from_words(cleanup(string))
|
13
|
+
result = result.round.to_i if options[:round]
|
14
|
+
result == 0 ? nil : result
|
15
|
+
end
|
16
|
+
|
17
|
+
# Given an integer or float distance in millimeters,
|
18
|
+
# and an optional format and unit,
|
19
|
+
# return a formatted string representing distance
|
20
|
+
def output(millimeters, options = {})
|
21
|
+
options[:unit] ||= 'millimeters'
|
22
|
+
options[:format] ||= :default
|
23
|
+
|
24
|
+
unit_options = unit_formatting_options[options[:format]]
|
25
|
+
options[:format] = :short if options[:format] == :default
|
26
|
+
unit = unit_format(options[:unit], options[:format])
|
27
|
+
|
28
|
+
result = humanize_distance(
|
29
|
+
distance_in_unit(millimeters, options[:unit]),
|
30
|
+
unit,
|
31
|
+
unit_options[:pluralize],
|
32
|
+
unit_options[:spacer]
|
33
|
+
)
|
34
|
+
|
35
|
+
result.length == 0 ? nil : result
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def distance_in_unit(millimeters, unit)
|
41
|
+
number = (millimeters.to_f / millimeter_multiplier(unit))
|
42
|
+
number = number.round if number == number.round
|
43
|
+
number
|
44
|
+
end
|
45
|
+
|
46
|
+
def distance_units
|
47
|
+
[
|
48
|
+
'millimeters' ,
|
49
|
+
'centimeters' ,
|
50
|
+
'meters' ,
|
51
|
+
'kilometers' ,
|
52
|
+
'inches' ,
|
53
|
+
'feet' ,
|
54
|
+
'yards' ,
|
55
|
+
'miles'
|
56
|
+
]
|
57
|
+
end
|
58
|
+
|
59
|
+
def millimeter_multiplier(unit = 'millimeters')
|
60
|
+
return 0 unless distance_units.include?(unit)
|
61
|
+
case unit
|
62
|
+
when 'millimeters' ; 1
|
63
|
+
when 'centimeters' ; 10
|
64
|
+
when 'meters' ; 1_000
|
65
|
+
when 'kilometers' ; 1_000_000
|
66
|
+
when 'inches' ; 25.4
|
67
|
+
when 'feet' ; 304.8
|
68
|
+
when 'yards' ; 914.4
|
69
|
+
when 'miles' ; 1_609_344
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def calculate_from_words(string)
|
74
|
+
distance = 0
|
75
|
+
words = string.split(' ')
|
76
|
+
words.each_with_index do |value, key|
|
77
|
+
if value =~ float_matcher
|
78
|
+
distance += (
|
79
|
+
convert_to_number(value) *
|
80
|
+
millimeter_multiplier(words[key + 1])
|
81
|
+
)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
distance
|
85
|
+
end
|
86
|
+
|
87
|
+
def humanize_distance(number, unit, pluralize, spacer = '')
|
88
|
+
return '' if number == 0
|
89
|
+
display_unit = ''
|
90
|
+
display_unit << unit
|
91
|
+
if !(number == 1) && pluralize
|
92
|
+
if unit == 'inch'
|
93
|
+
display_unit = 'inches'
|
94
|
+
elsif unit == 'foot'
|
95
|
+
display_unit = 'feet'
|
96
|
+
else
|
97
|
+
display_unit << 's'
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
result = "#{number}#{spacer}#{display_unit}"
|
102
|
+
result
|
103
|
+
end
|
104
|
+
|
105
|
+
def cleanup(string)
|
106
|
+
result = Numerizer.numerize(string)
|
107
|
+
result = result.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
|
108
|
+
result = filter_through_white_list(result)
|
109
|
+
end
|
110
|
+
|
111
|
+
def convert_to_number(string)
|
112
|
+
string.to_f % 1 > 0 ? string.to_f : string.to_i
|
113
|
+
end
|
114
|
+
|
115
|
+
def float_matcher
|
116
|
+
/[0-9]*\.?[0-9]+/
|
117
|
+
end
|
118
|
+
|
119
|
+
# Get rid of unknown words and map found
|
120
|
+
# words to defined distance units
|
121
|
+
def filter_through_white_list(string)
|
122
|
+
result = Array.new
|
123
|
+
string.split(' ').each do |word|
|
124
|
+
if word =~ float_matcher
|
125
|
+
result << word.strip
|
126
|
+
next
|
127
|
+
end
|
128
|
+
result << mappings[word.strip] if mappings.has_key?(word.strip)
|
129
|
+
end
|
130
|
+
result.join(' ')
|
131
|
+
end
|
132
|
+
|
133
|
+
def mappings
|
134
|
+
maps = Hash.new
|
135
|
+
mappings_by_format.values.each do |format_mappings|
|
136
|
+
maps.merge!(format_mappings)
|
137
|
+
end
|
138
|
+
maps
|
139
|
+
end
|
140
|
+
|
141
|
+
def unit_format(unit, format = :short)
|
142
|
+
formats = Hash.new
|
143
|
+
mappings_by_format[format].each do |k, v|
|
144
|
+
formats[v] = k
|
145
|
+
end
|
146
|
+
formats[unit]
|
147
|
+
end
|
148
|
+
|
149
|
+
def unit_formatting_options
|
150
|
+
{
|
151
|
+
:default => {
|
152
|
+
:spacer => ' ',
|
153
|
+
:pluralize => false
|
154
|
+
},
|
155
|
+
|
156
|
+
:short => {
|
157
|
+
:spacer => '',
|
158
|
+
:pluralize => false
|
159
|
+
},
|
160
|
+
|
161
|
+
:long => {
|
162
|
+
:spacer => ' ',
|
163
|
+
:pluralize => true
|
164
|
+
}
|
165
|
+
}
|
166
|
+
end
|
167
|
+
|
168
|
+
def mappings_by_format
|
169
|
+
{
|
170
|
+
:short => {
|
171
|
+
'mm' => 'millimeters',
|
172
|
+
'cm' => 'centimeters',
|
173
|
+
'm' => 'meters',
|
174
|
+
'km' => 'kilometers',
|
175
|
+
'"' => 'inches',
|
176
|
+
'\'' => 'feet',
|
177
|
+
'yd' => 'yards',
|
178
|
+
'mile' => 'miles'
|
179
|
+
},
|
180
|
+
|
181
|
+
:long => {
|
182
|
+
'millimeter' => 'millimeters',
|
183
|
+
'centimeter' => 'centimeters',
|
184
|
+
'meter' => 'meters',
|
185
|
+
'kilometer' => 'kilometers',
|
186
|
+
'inch' => 'inches',
|
187
|
+
'foot' => 'feet',
|
188
|
+
'yard' => 'yards',
|
189
|
+
'mile' => 'miles'
|
190
|
+
},
|
191
|
+
|
192
|
+
:other => {
|
193
|
+
'mms' => 'millimeters',
|
194
|
+
'millimeters' => 'millimeters',
|
195
|
+
'cms' => 'centimeters',
|
196
|
+
'centimeters' => 'centimeters',
|
197
|
+
'ms' => 'meters',
|
198
|
+
'meters' => 'meters',
|
199
|
+
'k' => 'kilometers',
|
200
|
+
'ks' => 'kilometers',
|
201
|
+
'kms' => 'kilometers',
|
202
|
+
'kilometers' => 'kilometers',
|
203
|
+
'inch' => 'inches',
|
204
|
+
'inches' => 'inches',
|
205
|
+
'ft' => 'feet',
|
206
|
+
'feet' => 'feet',
|
207
|
+
'y' => 'yards',
|
208
|
+
'yds' => 'yards',
|
209
|
+
'yards' => 'yards',
|
210
|
+
'miles' => 'miles'
|
211
|
+
}
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
data/lib/numerizer.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
class Numerizer
|
4
|
+
|
5
|
+
DIRECT_NUMS = [
|
6
|
+
['eleven', '11'],
|
7
|
+
['twelve', '12'],
|
8
|
+
['thirteen', '13'],
|
9
|
+
['fourteen', '14'],
|
10
|
+
['fifteen', '15'],
|
11
|
+
['sixteen', '16'],
|
12
|
+
['seventeen', '17'],
|
13
|
+
['eighteen', '18'],
|
14
|
+
['nineteen', '19'],
|
15
|
+
['ninteen', '19'], # Common misspelling
|
16
|
+
['zero', '0'],
|
17
|
+
['one', '1'],
|
18
|
+
['two', '2'],
|
19
|
+
['three', '3'],
|
20
|
+
['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
21
|
+
['five', '5'],
|
22
|
+
['six(\W|$)', '6\1'],
|
23
|
+
['seven(\W|$)', '7\1'],
|
24
|
+
['eight(\W|$)', '8\1'],
|
25
|
+
['nine(\W|$)', '9\1'],
|
26
|
+
['ten', '10'],
|
27
|
+
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
|
28
|
+
]
|
29
|
+
|
30
|
+
TEN_PREFIXES = [
|
31
|
+
['twenty', 20],
|
32
|
+
['thirty', 30],
|
33
|
+
['forty', 40],
|
34
|
+
['fourty', 40], # Common misspelling
|
35
|
+
['fifty', 50],
|
36
|
+
['sixty', 60],
|
37
|
+
['seventy', 70],
|
38
|
+
['eighty', 80],
|
39
|
+
['ninety', 90]
|
40
|
+
]
|
41
|
+
|
42
|
+
BIG_PREFIXES = [ ['hundred', 100],
|
43
|
+
['thousand', 1000],
|
44
|
+
['million', 1_000_000],
|
45
|
+
['billion', 1_000_000_000],
|
46
|
+
['trillion', 1_000_000_000_000],
|
47
|
+
]
|
48
|
+
|
49
|
+
def self.numerize(string)
|
50
|
+
string = string.dup
|
51
|
+
|
52
|
+
# preprocess
|
53
|
+
string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
|
54
|
+
string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
|
55
|
+
|
56
|
+
# easy/direct replacements
|
57
|
+
|
58
|
+
DIRECT_NUMS.each do |dn|
|
59
|
+
string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
|
60
|
+
end
|
61
|
+
|
62
|
+
# ten, twenty, etc.
|
63
|
+
|
64
|
+
TEN_PREFIXES.each do |tp|
|
65
|
+
string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
|
66
|
+
end
|
67
|
+
|
68
|
+
TEN_PREFIXES.each do |tp|
|
69
|
+
string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
|
70
|
+
end
|
71
|
+
|
72
|
+
# hundreds, thousands, millions, etc.
|
73
|
+
|
74
|
+
BIG_PREFIXES.each do |bp|
|
75
|
+
string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
|
76
|
+
andition(string)
|
77
|
+
end
|
78
|
+
|
79
|
+
# fractional addition
|
80
|
+
# I'm not combining this with the previous block as using float addition complicates the strings
|
81
|
+
# (with extraneous .0's and such )
|
82
|
+
string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
|
83
|
+
|
84
|
+
string.gsub(/<num>/, '')
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def self.andition(string)
|
90
|
+
sc = StringScanner.new(string)
|
91
|
+
while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
|
92
|
+
if sc[2] =~ /and/ || sc[1].size > sc[3].size
|
93
|
+
string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
|
94
|
+
sc.reset
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
@@ -0,0 +1,241 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe ChronicDistance do
|
4
|
+
|
5
|
+
describe '.parse' do
|
6
|
+
|
7
|
+
@exemplars = {
|
8
|
+
'1mm' => 1 * 1,
|
9
|
+
'5 mms' => 5 * 1,
|
10
|
+
'fifty millimeters' => 50 * 1,
|
11
|
+
'1cm' => 1 * 10,
|
12
|
+
'2 cms' => 2 * 10,
|
13
|
+
'1 centimeter' => 1 * 10,
|
14
|
+
'1m' => 1 * 1_000,
|
15
|
+
'2 ms' => 2 * 1_000,
|
16
|
+
'1 meter' => 1 * 1_000,
|
17
|
+
'1k' => 1 * 1_000_000,
|
18
|
+
'5 ks' => 5 * 1_000_000,
|
19
|
+
'1km' => 1 * 1_000_000,
|
20
|
+
'2 kms' => 2 * 1_000_000,
|
21
|
+
'1 kilometer' => 1 * 1_000_000,
|
22
|
+
'4 kilometers' => 4 * 1_000_000,
|
23
|
+
'1 inch' => 1 * 25.4,
|
24
|
+
'4 inches' => 4 * 25.4,
|
25
|
+
'1 ft' => 1 * 304.8,
|
26
|
+
'1y' => 1 * 914.4,
|
27
|
+
'one yd' => 1 * 914.4,
|
28
|
+
'eight yds' => 8 * 914.4,
|
29
|
+
'1 yard' => 1 * 914.4,
|
30
|
+
'forty yards' => 40 * 914.4,
|
31
|
+
'1 mile' => 1 * 1_609_344,
|
32
|
+
'3.5 miles' => 3.5 * 1_609_344,
|
33
|
+
'4 miles' => 4 * 1_609_344,
|
34
|
+
'4 kms 2 miles' => 4 * 1_000_000 + 2 * 1_609_344,
|
35
|
+
'those four kms and 2 miles also' => 4 * 1_000_000 + 2 * 1_609_344
|
36
|
+
}
|
37
|
+
|
38
|
+
it "should return nil if the string can't be parsed" do
|
39
|
+
ChronicDistance.parse('gobblygoo').should be_nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should return an integer if units are kilometers" do
|
43
|
+
ChronicDistance.parse('4kms').is_a?(Integer).should be_true
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should return an integer if units are miles" do
|
47
|
+
ChronicDistance.parse('3 miles').is_a?(Integer).should be_true
|
48
|
+
end
|
49
|
+
|
50
|
+
it "should return a float if units are yards" do
|
51
|
+
ChronicDistance.parse('4 yards').is_a?(Float).should be_true
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should return an integer if units are yards and distance is rounded" do
|
55
|
+
ChronicDistance.parse('four yards', :round => true).
|
56
|
+
is_a?(Integer).should be_true
|
57
|
+
end
|
58
|
+
|
59
|
+
@exemplars.each do |key, value|
|
60
|
+
|
61
|
+
it "should properly parse a distance like #{key}" do
|
62
|
+
ChronicDistance.parse(key).should == value
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
describe '.output' do
|
70
|
+
|
71
|
+
it "should return nil if the input can't be parsed" do
|
72
|
+
ChronicDistance.parse('gobblygoo').should be_nil
|
73
|
+
end
|
74
|
+
|
75
|
+
@exemplars = {
|
76
|
+
|
77
|
+
(1) => {
|
78
|
+
|
79
|
+
'millimeters' => {
|
80
|
+
:short => '1mm',
|
81
|
+
:default => '1 mm',
|
82
|
+
:long => '1 millimeter'
|
83
|
+
},
|
84
|
+
},
|
85
|
+
|
86
|
+
(1_609_344) => {
|
87
|
+
|
88
|
+
'millimeters' => {
|
89
|
+
:short => '1609344mm',
|
90
|
+
:default => '1609344 mm',
|
91
|
+
:long => '1609344 millimeters'
|
92
|
+
},
|
93
|
+
},
|
94
|
+
|
95
|
+
(1_609_344) => {
|
96
|
+
|
97
|
+
'centimeters' => {
|
98
|
+
:short => '160934.4cm',
|
99
|
+
:default => '160934.4 cm',
|
100
|
+
:long => '160934.4 centimeters'
|
101
|
+
},
|
102
|
+
},
|
103
|
+
|
104
|
+
(1_609_344) => {
|
105
|
+
'miles' => {
|
106
|
+
:short => '1mile',
|
107
|
+
:default => '1 mile',
|
108
|
+
:long => '1 mile'
|
109
|
+
}
|
110
|
+
},
|
111
|
+
|
112
|
+
(4 * 1_609_344) => {
|
113
|
+
|
114
|
+
'miles' => {
|
115
|
+
:short => '4mile',
|
116
|
+
:default => '4 mile',
|
117
|
+
:long => '4 miles'
|
118
|
+
}
|
119
|
+
},
|
120
|
+
|
121
|
+
(2.5 * 1_609_344) => {
|
122
|
+
|
123
|
+
'miles' => {
|
124
|
+
:short => '2.5mile',
|
125
|
+
:default => '2.5 mile',
|
126
|
+
:long => '2.5 miles'
|
127
|
+
}
|
128
|
+
},
|
129
|
+
|
130
|
+
(4 * 25.4) => {
|
131
|
+
|
132
|
+
'inches' => {
|
133
|
+
:short => '4"',
|
134
|
+
:default => '4 "',
|
135
|
+
:long => '4 inches'
|
136
|
+
}
|
137
|
+
},
|
138
|
+
|
139
|
+
(4 * 304.8) => {
|
140
|
+
|
141
|
+
'feet' => {
|
142
|
+
:short => '4\'',
|
143
|
+
:default => '4 \'',
|
144
|
+
:long => '4 feet'
|
145
|
+
}
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
@exemplars.each do |distance, format|
|
150
|
+
format.each do |unit, distance_format|
|
151
|
+
distance_format.each do |format_option, formatted_distance|
|
152
|
+
|
153
|
+
it "should output #{distance} millimeters as #{formatted_distance}
|
154
|
+
using the #{format_option.to_s} #{unit} format" do
|
155
|
+
ChronicDistance.output( distance,
|
156
|
+
:format => format_option,
|
157
|
+
:unit => unit).
|
158
|
+
should == formatted_distance
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should use the default format when the format is not specified" do
|
166
|
+
ChronicDistance.output(2000).should == '2000 mm'
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
describe " private methods" do
|
172
|
+
|
173
|
+
describe ".calculate_from_words" do
|
174
|
+
|
175
|
+
it "should return distance in millimeters" do
|
176
|
+
ChronicDistance.
|
177
|
+
instance_eval("calculate_from_words('10 centimeters')").
|
178
|
+
should == 100
|
179
|
+
end
|
180
|
+
|
181
|
+
it "should return distance in millimeters when mixing input units" do
|
182
|
+
ChronicDistance.
|
183
|
+
instance_eval("calculate_from_words('2 kilometers and 10 centimeters')").
|
184
|
+
should == 2_000_100
|
185
|
+
end
|
186
|
+
|
187
|
+
it "should return distance in millimeters when mixing input units" do
|
188
|
+
ChronicDistance.
|
189
|
+
instance_eval("calculate_from_words('2 miles and 10 centimeters')").
|
190
|
+
should == 3_218_788
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
describe ".cleanup" do
|
196
|
+
|
197
|
+
it "should clean up extraneous words" do
|
198
|
+
ChronicDistance.
|
199
|
+
instance_eval("cleanup('4 meters and 10 centimeters')").
|
200
|
+
should == '4 meters 10 centimeters'
|
201
|
+
end
|
202
|
+
|
203
|
+
it "should cleanup extraneous spaces" do
|
204
|
+
ChronicDistance.
|
205
|
+
instance_eval("cleanup(' 4 meters and 11 centimeters')").
|
206
|
+
should == '4 meters 11 centimeters'
|
207
|
+
end
|
208
|
+
|
209
|
+
it "should insert spaces where there aren't any" do
|
210
|
+
ChronicDistance.
|
211
|
+
instance_eval("cleanup('4m11.5cm')").
|
212
|
+
should == '4 meters 11.5 centimeters'
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
describe ".unit_format" do
|
218
|
+
|
219
|
+
it "should select 'meter' for the long meters format" do
|
220
|
+
ChronicDistance.
|
221
|
+
instance_eval("unit_format('meters', :long)").
|
222
|
+
should == 'meter'
|
223
|
+
end
|
224
|
+
|
225
|
+
it "should select 'm' for the short meters format" do
|
226
|
+
ChronicDistance.
|
227
|
+
instance_eval("unit_format('meters', :short)").
|
228
|
+
should == 'm'
|
229
|
+
end
|
230
|
+
|
231
|
+
it "should select 'mm' for the short millimeters format" do
|
232
|
+
ChronicDistance.
|
233
|
+
instance_eval("unit_format('millimeters', :short)").
|
234
|
+
should == 'mm'
|
235
|
+
end
|
236
|
+
|
237
|
+
end
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: chronic_distance
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Olivier Lauzon
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-04-07 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A simple Ruby natural language parser for distances.
|
22
|
+
email: olauzon@gmail.com
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README.rdoc
|
29
|
+
files:
|
30
|
+
- .autotest
|
31
|
+
- .gitignore
|
32
|
+
- MIT-LICENSE
|
33
|
+
- README.rdoc
|
34
|
+
- Rakefile
|
35
|
+
- VERSION.yml
|
36
|
+
- chronic_distance.gemspec
|
37
|
+
- lib/chronic_distance.rb
|
38
|
+
- lib/numerizer.rb
|
39
|
+
- spec/chronic_distance_spec.rb
|
40
|
+
- spec/spec_helper.rb
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://github.com/olauzon/chronic_distance
|
43
|
+
licenses: []
|
44
|
+
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options:
|
47
|
+
- --charset=UTF-8
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
requirements: []
|
65
|
+
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.3.6
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: A simple Ruby natural language parser for distances.
|
71
|
+
test_files:
|
72
|
+
- spec/chronic_distance_spec.rb
|
73
|
+
- spec/spec_helper.rb
|