olauzon-chronic_distance 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,78 @@
1
+ = Chronic Distance
2
+
3
+ A simple Ruby natural language parser for distances. Given a parse-able string distance, it returns the distance in millimeters.
4
+
5
+ The reverse can also be accomplished with the output method. Given a distance in millimeters, it outputs a string like "4 kilometers" or "4km" depending on the desired format.
6
+
7
+
8
+ == Installation
9
+
10
+ $ sudo gem sources -a http://gems.github.com
11
+ $ sudo gem install olauzon-chronic_distance
12
+
13
+
14
+ == Usage
15
+
16
+ >> require 'chronic_distance'
17
+ => true
18
+
19
+ ===Parsing examples
20
+
21
+ >> ChronicDistance.parse('5 kilometers')
22
+ => 5000000
23
+
24
+ >> ChronicDistance.parse('4 miles')
25
+ => 6437376
26
+
27
+ >> ChronicDistance.parse('four miles')
28
+ => 6437376
29
+
30
+ >> ChronicDistance.parse('1000m')
31
+ => 1000000
32
+
33
+ >> ChronicDistance.parse('1000 meters')
34
+ => 1000000
35
+
36
+ >> ChronicDistance.parse('ten yards')
37
+ => 9144.0
38
+
39
+ >> ChronicDistance.parse('500 feet')
40
+ => 152400.0
41
+
42
+ Nil is returned if the string can't be parsed:
43
+
44
+ >> ChronicDistance.parse('kilometers')
45
+ => nil
46
+
47
+ ===Output examples
48
+
49
+ >> ChronicDistance.output(5000000)
50
+ => 5000000 mm
51
+
52
+ >> ChronicDistance.output(5000000, :format => :short, :unit => 'kilometers')
53
+ => 5km
54
+
55
+ >> ChronicDistance.output(5000000, :format => :long, :unit => 'kilometers')
56
+ => 5 kilometers
57
+
58
+ >> ChronicDistance.output(5000000, :unit => 'meters')
59
+ => 5000 m
60
+
61
+ >> ChronicDistance.output(5000000, :format => :long, :unit => 'miles')
62
+ => 3.10685596118667 miles
63
+
64
+
65
+ == Inspiration
66
+
67
+ ChronicDistance borrows heavily from:
68
+
69
+ * hpoydar's ChronicDuration http://github.com/hpoydar/chronic_duration
70
+ * mojombo's Chronic http://github.com/mojombo/chronic
71
+
72
+
73
+ == TODO
74
+
75
+ * Benchmark, optimize
76
+ * Other locale support
77
+
78
+ Copyright (c) 2009 Olivier Lauzon. See MIT-LICENSE for details.
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 0
3
+ :patch: 1
4
+ :major: 0
@@ -0,0 +1,215 @@
1
+ require 'numerizer'
2
+
3
+ module ChronicDistance
4
+
5
+ extend self
6
+
7
+ # Given a string representation of distance,
8
+ # return an integer (or float) representation
9
+ # of the distance in millimeters. Accepts an options
10
+ # hash with :round => true as an option.
11
+ def parse(string, options = {})
12
+ result = calculate_from_words(cleanup(string))
13
+ result = result.round.to_i if options[:round]
14
+ result == 0 ? nil : result
15
+ end
16
+
17
+ # Given an integer or float distance in millimeters,
18
+ # and an optional format and unit,
19
+ # return a formatted string representing distance
20
+ def output(millimeters, options = {})
21
+ options[:unit] ||= 'millimeters'
22
+ options[:format] ||= :default
23
+
24
+ unit_options = unit_formatting_options[options[:format]]
25
+ options[:format] = :short if options[:format] == :default
26
+ unit = unit_format(options[:unit], options[:format])
27
+
28
+ result = humanize_distance(
29
+ distance_in_unit(millimeters, options[:unit]),
30
+ unit,
31
+ unit_options[:pluralize],
32
+ unit_options[:spacer]
33
+ )
34
+
35
+ result.length == 0 ? nil : result
36
+ end
37
+
38
+ private
39
+
40
+ def distance_in_unit(millimeters, unit)
41
+ number = (millimeters.to_f / millimeter_multiplier(unit))
42
+ number = number.round if number == number.round
43
+ number
44
+ end
45
+
46
+ def distance_units
47
+ [
48
+ 'millimeters' ,
49
+ 'centimeters' ,
50
+ 'meters' ,
51
+ 'kilometers' ,
52
+ 'inches' ,
53
+ 'feet' ,
54
+ 'yards' ,
55
+ 'miles'
56
+ ]
57
+ end
58
+
59
+ def millimeter_multiplier(unit = 'millimeters')
60
+ return 0 unless distance_units.include?(unit)
61
+ case unit
62
+ when 'millimeters' ; 1
63
+ when 'centimeters' ; 10
64
+ when 'meters' ; 1_000
65
+ when 'kilometers' ; 1_000_000
66
+ when 'inches' ; 25.4
67
+ when 'feet' ; 304.8
68
+ when 'yards' ; 914.4
69
+ when 'miles' ; 1_609_344
70
+ end
71
+ end
72
+
73
+ def calculate_from_words(string)
74
+ distance = 0
75
+ words = string.split(' ')
76
+ words.each_with_index do |value, key|
77
+ if value =~ float_matcher
78
+ distance += (
79
+ convert_to_number(value) *
80
+ millimeter_multiplier(words[key + 1])
81
+ )
82
+ end
83
+ end
84
+ distance
85
+ end
86
+
87
+ def humanize_distance(number, unit, pluralize, spacer = '')
88
+ return '' if number == 0
89
+ display_unit = ''
90
+ display_unit << unit
91
+ if !(number == 1) && pluralize
92
+ if unit == 'inch'
93
+ display_unit = 'inches'
94
+ elsif unit == 'foot'
95
+ display_unit = 'feet'
96
+ else
97
+ display_unit << 's'
98
+ end
99
+ end
100
+
101
+ result = "#{number}#{spacer}#{display_unit}"
102
+ result
103
+ end
104
+
105
+ def cleanup(string)
106
+ result = Numerizer.numerize(string)
107
+ result = result.gsub(float_matcher) {|n| " #{n} "}.squeeze(' ').strip
108
+ result = filter_through_white_list(result)
109
+ end
110
+
111
+ def convert_to_number(string)
112
+ string.to_f % 1 > 0 ? string.to_f : string.to_i
113
+ end
114
+
115
+ def float_matcher
116
+ /[0-9]*\.?[0-9]+/
117
+ end
118
+
119
+ # Get rid of unknown words and map found
120
+ # words to defined distance units
121
+ def filter_through_white_list(string)
122
+ result = Array.new
123
+ string.split(' ').each do |word|
124
+ if word =~ float_matcher
125
+ result << word.strip
126
+ next
127
+ end
128
+ result << mappings[word.strip] if mappings.has_key?(word.strip)
129
+ end
130
+ result.join(' ')
131
+ end
132
+
133
+ def mappings
134
+ maps = Hash.new
135
+ mappings_by_format.values.each do |format_mappings|
136
+ maps.merge!(format_mappings)
137
+ end
138
+ maps
139
+ end
140
+
141
+ def unit_format(unit, format = :short)
142
+ formats = Hash.new
143
+ mappings_by_format[format].each do |k, v|
144
+ formats[v] = k
145
+ end
146
+ formats[unit]
147
+ end
148
+
149
+ def unit_formatting_options
150
+ {
151
+ :default => {
152
+ :spacer => ' ',
153
+ :pluralize => false
154
+ },
155
+
156
+ :short => {
157
+ :spacer => '',
158
+ :pluralize => false
159
+ },
160
+
161
+ :long => {
162
+ :spacer => ' ',
163
+ :pluralize => true
164
+ }
165
+ }
166
+ end
167
+
168
+ def mappings_by_format
169
+ {
170
+ :short => {
171
+ 'mm' => 'millimeters',
172
+ 'cm' => 'centimeters',
173
+ 'm' => 'meters',
174
+ 'km' => 'kilometers',
175
+ '"' => 'inches',
176
+ '\'' => 'feet',
177
+ 'yd' => 'yards',
178
+ 'mile' => 'miles'
179
+ },
180
+
181
+ :long => {
182
+ 'millimeter' => 'millimeters',
183
+ 'centimeter' => 'centimeters',
184
+ 'meter' => 'meters',
185
+ 'kilometer' => 'kilometers',
186
+ 'inch' => 'inches',
187
+ 'foot' => 'feet',
188
+ 'yard' => 'yards',
189
+ 'mile' => 'miles'
190
+ },
191
+
192
+ :other => {
193
+ 'mms' => 'millimeters',
194
+ 'millimeters' => 'millimeters',
195
+ 'cms' => 'centimeters',
196
+ 'centimeters' => 'centimeters',
197
+ 'ms' => 'meters',
198
+ 'meters' => 'meters',
199
+ 'k' => 'kilometers',
200
+ 'ks' => 'kilometers',
201
+ 'kms' => 'kilometers',
202
+ 'kilometers' => 'kilometers',
203
+ 'inch' => 'inches',
204
+ 'inches' => 'inches',
205
+ 'ft' => 'feet',
206
+ 'feet' => 'feet',
207
+ 'y' => 'yards',
208
+ 'yds' => 'yards',
209
+ 'yards' => 'yards',
210
+ 'miles' => 'miles'
211
+ }
212
+ }
213
+ end
214
+
215
+ end
@@ -0,0 +1,99 @@
1
+ require 'strscan'
2
+
3
+ class Numerizer
4
+
5
+ DIRECT_NUMS = [
6
+ ['eleven', '11'],
7
+ ['twelve', '12'],
8
+ ['thirteen', '13'],
9
+ ['fourteen', '14'],
10
+ ['fifteen', '15'],
11
+ ['sixteen', '16'],
12
+ ['seventeen', '17'],
13
+ ['eighteen', '18'],
14
+ ['nineteen', '19'],
15
+ ['ninteen', '19'], # Common misspelling
16
+ ['zero', '0'],
17
+ ['one', '1'],
18
+ ['two', '2'],
19
+ ['three', '3'],
20
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
21
+ ['five', '5'],
22
+ ['six(\W|$)', '6\1'],
23
+ ['seven(\W|$)', '7\1'],
24
+ ['eight(\W|$)', '8\1'],
25
+ ['nine(\W|$)', '9\1'],
26
+ ['ten', '10'],
27
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28
+ ]
29
+
30
+ TEN_PREFIXES = [
31
+ ['twenty', 20],
32
+ ['thirty', 30],
33
+ ['forty', 40],
34
+ ['fourty', 40], # Common misspelling
35
+ ['fifty', 50],
36
+ ['sixty', 60],
37
+ ['seventy', 70],
38
+ ['eighty', 80],
39
+ ['ninety', 90]
40
+ ]
41
+
42
+ BIG_PREFIXES = [ ['hundred', 100],
43
+ ['thousand', 1000],
44
+ ['million', 1_000_000],
45
+ ['billion', 1_000_000_000],
46
+ ['trillion', 1_000_000_000_000],
47
+ ]
48
+
49
+ def self.numerize(string)
50
+ string = string.dup
51
+
52
+ # preprocess
53
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
54
+ string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
55
+
56
+ # easy/direct replacements
57
+
58
+ DIRECT_NUMS.each do |dn|
59
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
60
+ end
61
+
62
+ # ten, twenty, etc.
63
+
64
+ TEN_PREFIXES.each do |tp|
65
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
66
+ end
67
+
68
+ TEN_PREFIXES.each do |tp|
69
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
70
+ end
71
+
72
+ # hundreds, thousands, millions, etc.
73
+
74
+ BIG_PREFIXES.each do |bp|
75
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
76
+ andition(string)
77
+ end
78
+
79
+ # fractional addition
80
+ # I'm not combining this with the previous block as using float addition complicates the strings
81
+ # (with extraneous .0's and such )
82
+ string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
83
+
84
+ string.gsub(/<num>/, '')
85
+ end
86
+
87
+ private
88
+
89
+ def self.andition(string)
90
+ sc = StringScanner.new(string)
91
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
92
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
93
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
94
+ sc.reset
95
+ end
96
+ end
97
+ end
98
+
99
+ end
@@ -0,0 +1,241 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe ChronicDistance do
4
+
5
+ describe '.parse' do
6
+
7
+ @exemplars = {
8
+ '1mm' => 1 * 1,
9
+ '5 mms' => 5 * 1,
10
+ 'fifty millimeters' => 50 * 1,
11
+ '1cm' => 1 * 10,
12
+ '2 cms' => 2 * 10,
13
+ '1 centimeter' => 1 * 10,
14
+ '1m' => 1 * 1_000,
15
+ '2 ms' => 2 * 1_000,
16
+ '1 meter' => 1 * 1_000,
17
+ '1k' => 1 * 1_000_000,
18
+ '5 ks' => 5 * 1_000_000,
19
+ '1km' => 1 * 1_000_000,
20
+ '2 kms' => 2 * 1_000_000,
21
+ '1 kilometer' => 1 * 1_000_000,
22
+ '4 kilometers' => 4 * 1_000_000,
23
+ '1 inch' => 1 * 25.4,
24
+ '4 inches' => 4 * 25.4,
25
+ '1 ft' => 1 * 304.8,
26
+ '1y' => 1 * 914.4,
27
+ 'one yd' => 1 * 914.4,
28
+ 'eight yds' => 8 * 914.4,
29
+ '1 yard' => 1 * 914.4,
30
+ 'forty yards' => 40 * 914.4,
31
+ '1 mile' => 1 * 1_609_344,
32
+ '3.5 miles' => 3.5 * 1_609_344,
33
+ '4 miles' => 4 * 1_609_344,
34
+ '4 kms 2 miles' => 4 * 1_000_000 + 2 * 1_609_344,
35
+ 'those four kms and 2 miles also' => 4 * 1_000_000 + 2 * 1_609_344
36
+ }
37
+
38
+ it "should return nil if the string can't be parsed" do
39
+ ChronicDistance.parse('gobblygoo').should be_nil
40
+ end
41
+
42
+ it "should return an integer if units are kilometers" do
43
+ ChronicDistance.parse('4kms').is_a?(Integer).should be_true
44
+ end
45
+
46
+ it "should return an integer if units are miles" do
47
+ ChronicDistance.parse('3 miles').is_a?(Integer).should be_true
48
+ end
49
+
50
+ it "should return a float if units are yards" do
51
+ ChronicDistance.parse('4 yards').is_a?(Float).should be_true
52
+ end
53
+
54
+ it "should return an integer if units are yards and distance is rounded" do
55
+ ChronicDistance.parse('four yards', :round => true).
56
+ is_a?(Integer).should be_true
57
+ end
58
+
59
+ @exemplars.each do |key, value|
60
+
61
+ it "should properly parse a distance like #{key}" do
62
+ ChronicDistance.parse(key).should == value
63
+ end
64
+
65
+ end
66
+
67
+ end
68
+
69
+ describe '.output' do
70
+
71
+ it "should return nil if the input can't be parsed" do
72
+ ChronicDistance.parse('gobblygoo').should be_nil
73
+ end
74
+
75
+ @exemplars = {
76
+
77
+ (1) => {
78
+
79
+ 'millimeters' => {
80
+ :short => '1mm',
81
+ :default => '1 mm',
82
+ :long => '1 millimeter'
83
+ },
84
+ },
85
+
86
+ (1_609_344) => {
87
+
88
+ 'millimeters' => {
89
+ :short => '1609344mm',
90
+ :default => '1609344 mm',
91
+ :long => '1609344 millimeters'
92
+ },
93
+ },
94
+
95
+ (1_609_344) => {
96
+
97
+ 'centimeters' => {
98
+ :short => '160934.4cm',
99
+ :default => '160934.4 cm',
100
+ :long => '160934.4 centimeters'
101
+ },
102
+ },
103
+
104
+ (1_609_344) => {
105
+ 'miles' => {
106
+ :short => '1mile',
107
+ :default => '1 mile',
108
+ :long => '1 mile'
109
+ }
110
+ },
111
+
112
+ (4 * 1_609_344) => {
113
+
114
+ 'miles' => {
115
+ :short => '4mile',
116
+ :default => '4 mile',
117
+ :long => '4 miles'
118
+ }
119
+ },
120
+
121
+ (2.5 * 1_609_344) => {
122
+
123
+ 'miles' => {
124
+ :short => '2.5mile',
125
+ :default => '2.5 mile',
126
+ :long => '2.5 miles'
127
+ }
128
+ },
129
+
130
+ (4 * 25.4) => {
131
+
132
+ 'inches' => {
133
+ :short => '4"',
134
+ :default => '4 "',
135
+ :long => '4 inches'
136
+ }
137
+ },
138
+
139
+ (4 * 304.8) => {
140
+
141
+ 'feet' => {
142
+ :short => '4\'',
143
+ :default => '4 \'',
144
+ :long => '4 feet'
145
+ }
146
+ }
147
+ }
148
+
149
+ @exemplars.each do |distance, format|
150
+ format.each do |unit, distance_format|
151
+ distance_format.each do |format_option, formatted_distance|
152
+
153
+ it "should output #{distance} millimeters as #{formatted_distance}
154
+ using the #{format_option.to_s} #{unit} format" do
155
+ ChronicDistance.output( distance,
156
+ :format => format_option,
157
+ :unit => unit).
158
+ should == formatted_distance
159
+ end
160
+
161
+ end
162
+ end
163
+ end
164
+
165
+ it "should use the default format when the format is not specified" do
166
+ ChronicDistance.output(2000).should == '2000 mm'
167
+ end
168
+
169
+ end
170
+
171
+ describe " private methods" do
172
+
173
+ describe ".calculate_from_words" do
174
+
175
+ it "should return distance in millimeters" do
176
+ ChronicDistance.
177
+ instance_eval("calculate_from_words('10 centimeters')").
178
+ should == 100
179
+ end
180
+
181
+ it "should return distance in millimeters when mixing input units" do
182
+ ChronicDistance.
183
+ instance_eval("calculate_from_words('2 kilometers and 10 centimeters')").
184
+ should == 2_000_100
185
+ end
186
+
187
+ it "should return distance in millimeters when mixing input units" do
188
+ ChronicDistance.
189
+ instance_eval("calculate_from_words('2 miles and 10 centimeters')").
190
+ should == 3_218_788
191
+ end
192
+
193
+ end
194
+
195
+ describe ".cleanup" do
196
+
197
+ it "should clean up extraneous words" do
198
+ ChronicDistance.
199
+ instance_eval("cleanup('4 meters and 10 centimeters')").
200
+ should == '4 meters 10 centimeters'
201
+ end
202
+
203
+ it "should cleanup extraneous spaces" do
204
+ ChronicDistance.
205
+ instance_eval("cleanup(' 4 meters and 11 centimeters')").
206
+ should == '4 meters 11 centimeters'
207
+ end
208
+
209
+ it "should insert spaces where there aren't any" do
210
+ ChronicDistance.
211
+ instance_eval("cleanup('4m11.5cm')").
212
+ should == '4 meters 11.5 centimeters'
213
+ end
214
+
215
+ end
216
+
217
+ describe ".unit_format" do
218
+
219
+ it "should select 'meter' for the long meters format" do
220
+ ChronicDistance.
221
+ instance_eval("unit_format('meters', :long)").
222
+ should == 'meter'
223
+ end
224
+
225
+ it "should select 'm' for the short meters format" do
226
+ ChronicDistance.
227
+ instance_eval("unit_format('meters', :short)").
228
+ should == 'm'
229
+ end
230
+
231
+ it "should select 'mm' for the short millimeters format" do
232
+ ChronicDistance.
233
+ instance_eval("unit_format('millimeters', :short)").
234
+ should == 'mm'
235
+ end
236
+
237
+ end
238
+
239
+ end
240
+
241
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+
6
+ require 'chronic_distance'
7
+
8
+ Spec::Runner.configure do |config|
9
+
10
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: olauzon-chronic_distance
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Olivier Lauzon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-01 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: TODO
17
+ email: olauzon@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README.rdoc
26
+ - VERSION.yml
27
+ - lib/chronic_distance.rb
28
+ - lib/numerizer.rb
29
+ - spec/chronic_distance_spec.rb
30
+ - spec/spec_helper.rb
31
+ has_rdoc: true
32
+ homepage: http://github.com/olauzon/chronic_distance
33
+ post_install_message:
34
+ rdoc_options:
35
+ - --inline-source
36
+ - --charset=UTF-8
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ requirements: []
52
+
53
+ rubyforge_project:
54
+ rubygems_version: 1.2.0
55
+ signing_key:
56
+ specification_version: 2
57
+ summary: TODO
58
+ test_files: []
59
+