numerouno 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Manifest.txt +22 -2
- data/README.rdoc +26 -9
- data/Rakefile +16 -17
- data/features/parse_billions.feature +20 -0
- data/features/parse_hundreds.feature +20 -0
- data/features/parse_literal_numerals.feature +8 -0
- data/features/parse_millions.feature +20 -0
- data/features/parse_powers_of_ten.feature +36 -0
- data/features/parse_thousands.feature +16 -0
- data/features/parse_trillions.feature +20 -0
- data/features/parse_zero_to_ten.feature +34 -0
- data/features/steps/numbers.rb +24 -0
- data/features/substitute_numbers_within_a_string.feature +42 -0
- data/features/support/env.rb +6 -0
- data/lib/numerouno-parsing.rb +10 -3
- data/lib/numerouno.rb +4 -0
- data/lib/numerouno/numbers.rb +52 -0
- data/lib/numerouno/search.rb +3 -47
- data/lib/numerouno/substitution.rb +55 -0
- data/spec/numerouno/combiner_spec.rb +64 -0
- data/spec/numerouno/parser_spec.rb +55 -0
- data/spec/numerouno/search_spec.rb +36 -0
- data/spec/numerouno/spec_helper.rb +10 -0
- data/spec/numerouno/substitution_spec.rb +33 -0
- data/spec/spec.opts +5 -0
- data/todo +16 -0
- metadata +36 -10
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -2,12 +2,32 @@ History.txt
|
|
2
2
|
Manifest.txt
|
3
3
|
README.rdoc
|
4
4
|
Rakefile
|
5
|
-
|
5
|
+
features/parse_billions.feature
|
6
|
+
features/parse_hundreds.feature
|
7
|
+
features/parse_literal_numerals.feature
|
8
|
+
features/parse_millions.feature
|
9
|
+
features/parse_powers_of_ten.feature
|
10
|
+
features/parse_thousands.feature
|
11
|
+
features/parse_trillions.feature
|
12
|
+
features/parse_zero_to_ten.feature
|
13
|
+
features/steps/numbers.rb
|
14
|
+
features/substitute_numbers_within_a_string.feature
|
15
|
+
features/support/env.rb
|
6
16
|
lib/numerouno-parsing.rb
|
17
|
+
lib/numerouno.rb
|
7
18
|
lib/numerouno/combiner.rb
|
19
|
+
lib/numerouno/numbers.rb
|
8
20
|
lib/numerouno/parser.rb
|
9
21
|
lib/numerouno/search.rb
|
22
|
+
lib/numerouno/substitution.rb
|
10
23
|
script/console
|
11
24
|
script/destroy
|
12
25
|
script/generate
|
13
|
-
|
26
|
+
spec/numerouno/combiner_spec.rb
|
27
|
+
spec/numerouno/parser_spec.rb
|
28
|
+
spec/numerouno/search_spec.rb
|
29
|
+
spec/numerouno/spec_helper.rb
|
30
|
+
spec/numerouno/substitution_spec.rb
|
31
|
+
spec/spec.opts
|
32
|
+
tasks/rspec.rake
|
33
|
+
todo
|
data/README.rdoc
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
* http://github.com/brentsnook/numerouno
|
4
4
|
* http://groups.google.com/group/numerouno-number-parsing
|
5
5
|
|
6
|
+
== Description
|
7
|
+
|
6
8
|
English natural language parser for numbers.
|
7
9
|
|
8
10
|
* Parse 'five' to return 5
|
@@ -11,39 +13,54 @@ English natural language parser for numbers.
|
|
11
13
|
|
12
14
|
Recognises numbers in the trillions.
|
13
15
|
|
16
|
+
=== How does it all work?
|
17
|
+
|
18
|
+
The English language has explicit rules for expressing numbers. Numeruno attempts to recognise strings based on these rules.
|
19
|
+
|
20
|
+
Check out http://wiki.github.com/brentsnook/numerouno for details.
|
21
|
+
|
14
22
|
== Installation
|
15
23
|
|
16
24
|
sudo gem install numerouno
|
17
25
|
|
18
|
-
|
19
|
-
|
20
|
-
Grab the code from github:
|
26
|
+
Or grab the code from Github and build yourself:
|
21
27
|
|
22
28
|
git clone git://github.com/brentsnook/numerouno.git
|
23
29
|
cd numerouno
|
24
30
|
rake install_gem
|
25
31
|
|
26
|
-
==
|
32
|
+
== Synopsis
|
27
33
|
|
28
34
|
Just require numerouno to add magical number parsing powers to your strings:
|
29
35
|
|
30
36
|
require 'numerouno'
|
31
37
|
'sixty five'.as_number
|
38
|
+
=> 65
|
39
|
+
'65'.as_number
|
40
|
+
=> 65
|
41
|
+
'two bit varmint'.sub_numbers
|
42
|
+
=> '2 bit varmint'
|
32
43
|
|
33
44
|
Or if you're a sook who doesn't like the idea of String being opened up:
|
34
45
|
|
35
46
|
require 'numerouno-parsing'
|
47
|
+
|
36
48
|
Numerouno.parse 'sixty five'
|
49
|
+
Numerouno.replace 'two bit varmint'
|
37
50
|
|
38
|
-
|
51
|
+
One serving suggestion is to use it to use it with your Cucumber[http://cukes.info/] steps.
|
39
52
|
|
40
|
-
|
53
|
+
meaning_steps.rb
|
41
54
|
|
42
|
-
|
55
|
+
require 'numerouno'
|
56
|
+
|
57
|
+
When /the meaning of life is (.+)/ do |number|
|
58
|
+
42.should == number.as_number
|
59
|
+
end
|
43
60
|
|
44
|
-
|
61
|
+
meaning_of_life.feature
|
45
62
|
|
46
|
-
|
63
|
+
Then the meaning of life is forty two
|
47
64
|
|
48
65
|
== License
|
49
66
|
|
data/Rakefile
CHANGED
@@ -1,26 +1,25 @@
|
|
1
|
-
%w[rubygems rake rake/clean fileutils newgem rubigen].each { |f| require f }
|
1
|
+
%w[rubygems rake rake/clean hoe fileutils newgem rubigen].each { |f| require f }
|
2
2
|
require File.dirname(__FILE__) + '/lib/numerouno'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
p.extra_dev_deps = [
|
4
|
+
Hoe.spec 'numerouno' do
|
5
|
+
version = Numerouno::VERSION
|
6
|
+
developer 'Brent Snook', 'brent@fuglylogic.com'
|
7
|
+
self.readme_file = 'README.rdoc'
|
8
|
+
self.clean_globs |= %w[**/.DS_Store tmp *.log]
|
9
|
+
self.rsync_args = '-av --delete --ignore-errors' # is this needed?
|
10
|
+
self.extra_dev_deps = [
|
12
11
|
['newgem', ">= #{::Newgem::VERSION}"],
|
13
|
-
['rspec', '>= 1.2.
|
14
|
-
['cucumber', '>= 0.3.
|
12
|
+
['rspec', '>= 1.2.8'],
|
13
|
+
['cucumber', '>= 0.3.103']
|
15
14
|
]
|
16
|
-
|
17
|
-
p.clean_globs |= %w[**/.DS_Store tmp *.log]
|
18
|
-
path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
|
19
|
-
p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
|
20
|
-
p.rsync_args = '-av --delete --ignore-errors'
|
21
15
|
end
|
22
16
|
|
23
|
-
require '
|
17
|
+
require 'cucumber/rake/task'
|
18
|
+
Cucumber::Rake::Task.new(:features) do |t|
|
19
|
+
t.cucumber_opts = "features --format pretty"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'newgem/tasks'
|
24
23
|
Dir['tasks/**/*.rake'].each { |t| load t }
|
25
24
|
|
26
25
|
task :default => [:spec, :features]
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Feature: Parse billions
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing billions
|
5
|
+
|
6
|
+
Scenario: Parse a billion with no multiplier
|
7
|
+
When 'a billion' is parsed
|
8
|
+
Then the number will be 1,000,000,000
|
9
|
+
|
10
|
+
Scenario: Parse several billion
|
11
|
+
When 'six hundred and thirty seven billion' is parsed
|
12
|
+
Then the number will be 637,000,000,000
|
13
|
+
|
14
|
+
Scenario: Parse several hundred billion straight
|
15
|
+
When 'seven hundred billion' is parsed
|
16
|
+
Then the number will be 700,000,000,000
|
17
|
+
|
18
|
+
Scenario: Parse numbers between billions
|
19
|
+
When 'two hundred and fifty billion, six hundred and thirty seven million, four hundred and thirty eight thousand, two hundred and ninety eight' is parsed
|
20
|
+
Then the number will be 250,637,438,298
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Feature: Recognise hundreds
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing hundreds
|
5
|
+
|
6
|
+
Scenario: Parse a hundred with no multiplier
|
7
|
+
When 'a hundred' is parsed
|
8
|
+
Then the number will be 100
|
9
|
+
|
10
|
+
Scenario: Parse several hundred
|
11
|
+
When 'five hundred' is parsed
|
12
|
+
Then the number will be 500
|
13
|
+
|
14
|
+
Scenario: Parse numbers between hundreds
|
15
|
+
When 'one hundred and thirty five' is parsed
|
16
|
+
Then the number will be 135
|
17
|
+
|
18
|
+
Scenario: Parse different ways of stating thousands
|
19
|
+
When 'twenty seven hundred' is parsed
|
20
|
+
Then the number will be 2700
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Feature: Recognise millions
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing millions
|
5
|
+
|
6
|
+
Scenario: Parse a million with no multiplier
|
7
|
+
When 'a million' is parsed
|
8
|
+
Then the number will be 1,000,000
|
9
|
+
|
10
|
+
Scenario: Parse several million
|
11
|
+
When 'six hundred and thirty seven million' is parsed
|
12
|
+
Then the number will be 637,000,000
|
13
|
+
|
14
|
+
Scenario: Parse several hundred million straight
|
15
|
+
When 'seven hundred million' is parsed
|
16
|
+
Then the number will be 700,000,000
|
17
|
+
|
18
|
+
Scenario: Parse numbers between millions
|
19
|
+
When 'six hundred and thirty seven million, four hundred and thirty eight thousand, two hundred and ninety eight' is parsed
|
20
|
+
Then the number will be 637,438,298
|
@@ -0,0 +1,36 @@
|
|
1
|
+
Feature: Recognise powers of ten
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing powers of ten
|
5
|
+
|
6
|
+
Scenario Outline: Parse normal numbers
|
7
|
+
When '<string>' is parsed
|
8
|
+
Then the number will be <number>
|
9
|
+
|
10
|
+
Scenarios: Parse 11 to 20 and powers of 10
|
11
|
+
| string | number |
|
12
|
+
| eleven | 11 |
|
13
|
+
| twelve | 12 |
|
14
|
+
| thirteen | 13 |
|
15
|
+
| fourteen | 14 |
|
16
|
+
| fifteen | 15 |
|
17
|
+
| sixteen | 16 |
|
18
|
+
| seventeen | 17 |
|
19
|
+
| eighteen | 18 |
|
20
|
+
| nineteen | 19 |
|
21
|
+
| twenty | 20 |
|
22
|
+
| thirty | 30 |
|
23
|
+
| forty | 40 |
|
24
|
+
| fifty | 50 |
|
25
|
+
| sixty | 60 |
|
26
|
+
| seventy | 70 |
|
27
|
+
| eighty | 80 |
|
28
|
+
| ninety | 90 |
|
29
|
+
|
30
|
+
Scenario: Parse 21 as separate words
|
31
|
+
When 'twenty one' is parsed
|
32
|
+
Then the number will be 21
|
33
|
+
|
34
|
+
Scenario: Parse 21 as a single word
|
35
|
+
When 'twentyone' is parsed
|
36
|
+
Then the number will be 21
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Recognise thousands
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing thousands
|
5
|
+
|
6
|
+
Scenario: Parse a thousand with no multiplier
|
7
|
+
When 'a thousand' is parsed
|
8
|
+
Then the number will be 1,000
|
9
|
+
|
10
|
+
Scenario: Parse several thousand
|
11
|
+
When 'six hundred and thirty seven thousand' is parsed
|
12
|
+
Then the number will be 637,000
|
13
|
+
|
14
|
+
Scenario: Parse numbers between thousands
|
15
|
+
When 'six hundred and thirty seven thousand, four hundred and thirty eight' is parsed
|
16
|
+
Then the number will be 637,438
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Feature: Recognise trillions
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing trillions
|
5
|
+
|
6
|
+
Scenario: Parse a trillion with no multiplier
|
7
|
+
When 'a trillion' is parsed
|
8
|
+
Then the number will be 1,000,000,000,000
|
9
|
+
|
10
|
+
Scenario: Parse several trillion
|
11
|
+
When 'six hundred and thirty seven trillion' is parsed
|
12
|
+
Then the number will be 637,000,000,000,000
|
13
|
+
|
14
|
+
Scenario: Parse several hundred trillion straight
|
15
|
+
When 'seven hundred trillion' is parsed
|
16
|
+
Then the number will be 700,000,000,000,000
|
17
|
+
|
18
|
+
Scenario: Parse numbers between trillions
|
19
|
+
When 'nine hundred and forty two trillion, two hundred and fifty billion, six hundred and thirty seven million, four hundred and thirty eight thousand, two hundred and ninety eight' is parsed
|
20
|
+
Then the number will be 942,250,637,438,298
|
@@ -0,0 +1,34 @@
|
|
1
|
+
Feature: Recognise from zero to ten
|
2
|
+
|
3
|
+
So that I can convert number strings into numerals
|
4
|
+
I want to parse strings containing zero to ten
|
5
|
+
|
6
|
+
Scenario Outline: Parse normal numbers
|
7
|
+
When '<string>' is parsed
|
8
|
+
Then the number will be <number>
|
9
|
+
|
10
|
+
Scenarios: Parse 1 to 10
|
11
|
+
| string | number |
|
12
|
+
| zero | 0 |
|
13
|
+
| one | 1 |
|
14
|
+
| two | 2 |
|
15
|
+
| three | 3 |
|
16
|
+
| four | 4 |
|
17
|
+
| five | 5 |
|
18
|
+
| six | 6 |
|
19
|
+
| seven | 7 |
|
20
|
+
| eight | 8 |
|
21
|
+
| nine | 9 |
|
22
|
+
| ten | 10 |
|
23
|
+
|
24
|
+
Scenario: No number found
|
25
|
+
When 'gibberish' is parsed
|
26
|
+
Then an error will be raised stating that no number was found
|
27
|
+
|
28
|
+
Scenario: Number string contains noise
|
29
|
+
When 'a bratwurst costs two euros' is parsed
|
30
|
+
Then the number will be 2
|
31
|
+
|
32
|
+
Scenario: Number is upper case
|
33
|
+
When 'a bratwurst costs TWO euros' is parsed
|
34
|
+
Then the number will be 2
|
@@ -0,0 +1,24 @@
|
|
1
|
+
When /^'(.*)' is parsed$/ do |string|
|
2
|
+
@string = string
|
3
|
+
@stimulus = lambda { string.as_number }
|
4
|
+
end
|
5
|
+
|
6
|
+
When /^'(.*)' has numbers substituted$/ do |string|
|
7
|
+
@string = string
|
8
|
+
@stimulus = lambda { string.sub_numbers }
|
9
|
+
end
|
10
|
+
|
11
|
+
Then /^the number will be (.*)$/ do |parsed_number|
|
12
|
+
@stimulus.call.should == parsed_number.gsub(',', '').to_i
|
13
|
+
end
|
14
|
+
|
15
|
+
Then /^an error will be raised stating that no number was found$/ do
|
16
|
+
@stimulus.should raise_error(
|
17
|
+
Numerouno::NoNumberFoundError,
|
18
|
+
"No number found in string: #{@string}"
|
19
|
+
)
|
20
|
+
end
|
21
|
+
|
22
|
+
Then /^the string will be '(.*)'$/ do |replaced_string|
|
23
|
+
@stimulus.call.should == replaced_string
|
24
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Feature: Substitute numbers within a string
|
2
|
+
|
3
|
+
In order to allow tools like Chronic to parse a string containing a number
|
4
|
+
I want to substitute number phrases within a string with numerals
|
5
|
+
|
6
|
+
Scenario Outline: Substitute normal numbers
|
7
|
+
When 'I have <string> Alf pogs' has numbers substituted
|
8
|
+
Then the string will be 'I have <number> Alf pogs'
|
9
|
+
|
10
|
+
Scenarios: Substitute numbers
|
11
|
+
| string | number |
|
12
|
+
| zero | 0 |
|
13
|
+
| one | 1 |
|
14
|
+
| two | 2 |
|
15
|
+
| three | 3 |
|
16
|
+
| four | 4 |
|
17
|
+
| five | 5 |
|
18
|
+
| six | 6 |
|
19
|
+
| seven | 7 |
|
20
|
+
| eight | 8 |
|
21
|
+
| nine | 9 |
|
22
|
+
| ten | 10 |
|
23
|
+
| eleven | 11 |
|
24
|
+
| twelve | 12 |
|
25
|
+
| thirteen | 13 |
|
26
|
+
| fourteen | 14 |
|
27
|
+
| fifteen | 15 |
|
28
|
+
| sixteen | 16 |
|
29
|
+
| seventeen | 17 |
|
30
|
+
| eighteen | 18 |
|
31
|
+
| nineteen | 19 |
|
32
|
+
| twenty | 20 |
|
33
|
+
| thirty | 30 |
|
34
|
+
| forty | 40 |
|
35
|
+
| fifty | 50 |
|
36
|
+
| sixty | 60 |
|
37
|
+
| seventy | 70 |
|
38
|
+
| eighty | 80 |
|
39
|
+
| ninety | 90 |
|
40
|
+
| seven million, nine hundred and seventy seven | 7000977 |
|
41
|
+
|
42
|
+
|
data/lib/numerouno-parsing.rb
CHANGED
@@ -3,14 +3,21 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
3
3
|
|
4
4
|
[
|
5
5
|
'combiner',
|
6
|
+
'numbers',
|
6
7
|
'search',
|
7
|
-
'parser'
|
8
|
+
'parser',
|
9
|
+
'substitution'
|
8
10
|
].each {|file| require "numerouno/#{file}"}
|
9
11
|
|
10
12
|
module Numerouno
|
11
|
-
VERSION = '0.
|
13
|
+
VERSION = '0.2.0'
|
12
14
|
|
13
15
|
def self.parse string
|
14
16
|
Parser.number_from string
|
15
|
-
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.replace string
|
20
|
+
Substitution.new(string).sub
|
21
|
+
end
|
22
|
+
|
16
23
|
end
|
data/lib/numerouno.rb
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
module Numerouno
|
2
|
+
|
3
|
+
module Numbers
|
4
|
+
|
5
|
+
# the mapping between number strings and numbers needs to be ordered
|
6
|
+
# this is to avoid the wrong mapping being picked up
|
7
|
+
# i.e /seven/ would match 'seventy' before /seventy/
|
8
|
+
NUMBER_STRINGS = [
|
9
|
+
['sixty', 60],
|
10
|
+
['seventy', 70],
|
11
|
+
['eighty', 80],
|
12
|
+
['ninety', 90],
|
13
|
+
['fourteen', 14],
|
14
|
+
['sixteen', 16],
|
15
|
+
['seventeen', 17],
|
16
|
+
['eighteen', 18],
|
17
|
+
['nineteen', 19],
|
18
|
+
|
19
|
+
['zero', 0],
|
20
|
+
['one', 1],
|
21
|
+
['two', 2],
|
22
|
+
['three', 3],
|
23
|
+
['four', 4],
|
24
|
+
['five', 5],
|
25
|
+
['six', 6],
|
26
|
+
['seven', 7],
|
27
|
+
['eight', 8],
|
28
|
+
['nine', 9],
|
29
|
+
['ten', 10],
|
30
|
+
['eleven', 11],
|
31
|
+
['twelve', 12],
|
32
|
+
['thirteen', 13],
|
33
|
+
['fifteen', 15],
|
34
|
+
['twenty', 20],
|
35
|
+
['thirty', 30],
|
36
|
+
['forty', 40],
|
37
|
+
['fifty', 50],
|
38
|
+
|
39
|
+
['hundred', 100],
|
40
|
+
['thousand', 1000],
|
41
|
+
['million', 1000000],
|
42
|
+
['billion', 1000000000],
|
43
|
+
['trillion', 1000000000000]
|
44
|
+
]
|
45
|
+
|
46
|
+
NUMBER_LOOKUP = NUMBER_STRINGS.inject(Hash.new) do |hash, map|
|
47
|
+
hash[map[0]] = map[1]
|
48
|
+
hash
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
data/lib/numerouno/search.rb
CHANGED
@@ -9,52 +9,8 @@ module Numerouno
|
|
9
9
|
end
|
10
10
|
|
11
11
|
class Search
|
12
|
-
|
13
|
-
# the mapping between number strings and numbers needs to be ordered
|
14
|
-
# this is to avoid the wrong mapping being picked up
|
15
|
-
# i.e /seven/ would match 'seventy' before /seventy/
|
16
|
-
NUMBER_STRINGS = [
|
17
|
-
['sixty', 60],
|
18
|
-
['seventy', 70],
|
19
|
-
['eighty', 80],
|
20
|
-
['ninety', 90],
|
21
|
-
['fourteen', 14],
|
22
|
-
['sixteen', 16],
|
23
|
-
['seventeen', 17],
|
24
|
-
['eighteen', 18],
|
25
|
-
['nineteen', 19],
|
26
|
-
|
27
|
-
['zero', 0],
|
28
|
-
['one', 1],
|
29
|
-
['two', 2],
|
30
|
-
['three', 3],
|
31
|
-
['four', 4],
|
32
|
-
['five', 5],
|
33
|
-
['six', 6],
|
34
|
-
['seven', 7],
|
35
|
-
['eight', 8],
|
36
|
-
['nine', 9],
|
37
|
-
['ten', 10],
|
38
|
-
['eleven', 11],
|
39
|
-
['twelve', 12],
|
40
|
-
['thirteen', 13],
|
41
|
-
['fifteen', 15],
|
42
|
-
['twenty', 20],
|
43
|
-
['thirty', 30],
|
44
|
-
['forty', 40],
|
45
|
-
['fifty', 50],
|
46
|
-
|
47
|
-
['hundred', 100],
|
48
|
-
['thousand', 1000],
|
49
|
-
['million', 1000000],
|
50
|
-
['billion', 1000000000],
|
51
|
-
['trillion', 1000000000000]
|
52
|
-
]
|
53
12
|
|
54
|
-
|
55
|
-
hash[map[0]] = map[1]
|
56
|
-
hash
|
57
|
-
end
|
13
|
+
include Numbers
|
58
14
|
|
59
15
|
def initialize string
|
60
16
|
@string = string
|
@@ -71,7 +27,7 @@ module Numerouno
|
|
71
27
|
def search
|
72
28
|
if match = next_match
|
73
29
|
@string = string_without match
|
74
|
-
@numbers[match.pre_match.length] = NUMBER_LOOKUP[match.to_s]
|
30
|
+
@numbers[match.pre_match.length] = NUMBER_LOOKUP[match.to_s.downcase]
|
75
31
|
else
|
76
32
|
@string = ''
|
77
33
|
end
|
@@ -83,7 +39,7 @@ module Numerouno
|
|
83
39
|
|
84
40
|
def next_match
|
85
41
|
NUMBER_STRINGS.collect do |number_string, number|
|
86
|
-
@string.match
|
42
|
+
@string.match(/#{number_string}/i)
|
87
43
|
end.compact.first
|
88
44
|
end
|
89
45
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Numerouno
|
4
|
+
|
5
|
+
class Substitution
|
6
|
+
|
7
|
+
include Numbers
|
8
|
+
|
9
|
+
def initialize source
|
10
|
+
@source = source
|
11
|
+
end
|
12
|
+
|
13
|
+
def sub
|
14
|
+
matches = {}
|
15
|
+
|
16
|
+
NUMBER_STRINGS.each do |number_string, number|
|
17
|
+
matches = matches_within(@source, number_string).merge(matches)
|
18
|
+
end
|
19
|
+
|
20
|
+
matches.empty? ? @source : replace(matches, @source)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def match_locations source, string
|
26
|
+
match_locations = []
|
27
|
+
scanner = StringScanner.new source
|
28
|
+
while(scanner.scan_until(/#{string}/i))
|
29
|
+
match_locations << scanner.pre_match.length
|
30
|
+
end
|
31
|
+
match_locations
|
32
|
+
end
|
33
|
+
|
34
|
+
def matches_within source, string
|
35
|
+
start_vs_length = {}
|
36
|
+
match_locations(source, string).each do |start|
|
37
|
+
start_vs_length[start] = string.length
|
38
|
+
end
|
39
|
+
start_vs_length
|
40
|
+
end
|
41
|
+
|
42
|
+
def replace match_locations, source
|
43
|
+
|
44
|
+
sorted_matches = match_locations.sort
|
45
|
+
|
46
|
+
first_match, last_match = sorted_matches.first, sorted_matches.last
|
47
|
+
range = (first_match.first..(last_match.first + last_match.last - 1))
|
48
|
+
|
49
|
+
phrase = source[range]
|
50
|
+
|
51
|
+
source.sub(/#{phrase.downcase}/i, Parser.number_from(phrase).to_s)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
describe Numerouno::Combiner do
|
4
|
+
|
5
|
+
before { @power = 1000 }
|
6
|
+
|
7
|
+
it 'only combines numbers of the correct power' do
|
8
|
+
numbers = [2, 100, 3, 1000, 4, 1000000, 5]
|
9
|
+
combined(numbers).should eql([2, 100, 3004, 1000000, 5])
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'multiplication' do
|
13
|
+
|
14
|
+
it 'multiplies by the number on the left' do
|
15
|
+
combined([2, 1000]).should eql([2000])
|
16
|
+
end
|
17
|
+
|
18
|
+
it "doesn't multiply when the current number is the first" do
|
19
|
+
combined([1000, 5]).should_not eql([5000])
|
20
|
+
end
|
21
|
+
|
22
|
+
# there is no need to multiply powers of ten
|
23
|
+
# i.e. there is no 'two ten', number like 'twenty' are already multiplied
|
24
|
+
it 'only multiplies when the current power is greater than ten' do
|
25
|
+
@power = 10
|
26
|
+
combined([5, 20]).should eql([5, 20])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe 'addition' do
|
31
|
+
|
32
|
+
it 'adds the number on the right' do
|
33
|
+
combined([1000, 5]).should eql([1005])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "doesn't add when the current number is the last" do
|
37
|
+
combined([5, 1000]).should_not eql([1005])
|
38
|
+
end
|
39
|
+
|
40
|
+
# we don't want to add a number that is greater
|
41
|
+
# because the current number is probably part of a multiplication
|
42
|
+
# i.e. "twenty thousand" should produce 20,000 not 1,020
|
43
|
+
it "doesn't add when the number is less than number to the right" do
|
44
|
+
combined([1000, 1000000]).should_not eql([1001000])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def combined numbers
|
49
|
+
Numerouno::Combiner.new(numbers).of_power(@power).apply!
|
50
|
+
numbers
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe Numerouno::CombinerMethods do
|
55
|
+
|
56
|
+
include Numerouno::CombinerMethods
|
57
|
+
|
58
|
+
it 'build a combiner' do
|
59
|
+
numbers = [2, 100, 5]
|
60
|
+
combine(numbers).of_power(100).apply!
|
61
|
+
|
62
|
+
numbers.should eql([205])
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
describe Numerouno::Parser do
|
4
|
+
|
5
|
+
it 'parses numbers for powers of ten up to a trillion' do
|
6
|
+
subject.stub!(:numbers_within).and_return([0])
|
7
|
+
combiners = [10, 100, 1000, 1000000, 1000000000, 1000000000000].collect do |power|
|
8
|
+
combiner = mock :combine
|
9
|
+
combiner.should_receive(:of_power).with(power).and_return combiner
|
10
|
+
combiner.should_receive(:apply!)
|
11
|
+
combiner
|
12
|
+
end
|
13
|
+
|
14
|
+
subject.stub!(:combine).exactly(6).times.and_return *combiners
|
15
|
+
|
16
|
+
subject.number_from ''
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'sums all found numbers' do
|
20
|
+
subject.stub!(:numbers_within).and_return([7000, 600])
|
21
|
+
subject.stub!(:combine).and_return mock('combiner', :null_object => true)
|
22
|
+
|
23
|
+
subject.number_from('seven thousand, six hundred').should be(7600)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'fails when no numbers are found in string' do
|
27
|
+
lambda {
|
28
|
+
subject.number_from 'I contain no numbers'
|
29
|
+
}.should raise_error(Numerouno::NoNumberFoundError)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe Numerouno::Parser, 'parsing' do
|
34
|
+
|
35
|
+
it 'parses the number 0' do
|
36
|
+
subject.number_from('0').should == 0
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'parses numbers greater than 0' do
|
40
|
+
subject.number_from('5').should == 5
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'parses numbers less than 0' do
|
44
|
+
subject.number_from('-1').should == -1
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'parses numbers starting with a numeral as a literal number' do
|
48
|
+
subject.number_from('55 hundred').should == 55
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'numbers starting with a alphabetic character as a phrase' do
|
52
|
+
subject.number_from('two 5').should == 2
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
describe Numerouno::Search do
|
4
|
+
|
5
|
+
# a lot of the search functionality is already covered in the features
|
6
|
+
|
7
|
+
it 'finds all numbers within a string' do
|
8
|
+
search_for('seventy five thousand and forty two').should eql([70, 5, 1000, 40, 2])
|
9
|
+
end
|
10
|
+
|
11
|
+
it "doesn't falsely identify partial numbers in powers of ten" do
|
12
|
+
search_for('sixty seventy eighty ninety').should eql([60, 70, 80, 90])
|
13
|
+
end
|
14
|
+
|
15
|
+
it "doesn't falsely identify partial numbers in teens" do
|
16
|
+
search_for('fourteen sixteen seventeen eighteen nineteen').should eql([14, 16, 17, 18, 19])
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'finds uppercase numbers' do
|
20
|
+
search_for('NINE is uppercase').should eql([9])
|
21
|
+
end
|
22
|
+
|
23
|
+
def search_for string
|
24
|
+
Numerouno::Search.new(string).find_all
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe Numerouno::SearchMethods do
|
29
|
+
|
30
|
+
include Numerouno::SearchMethods
|
31
|
+
|
32
|
+
it 'builds a search' do
|
33
|
+
numbers_within('one two three').should eql([1, 2, 3])
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
describe Numerouno::Substitution do
|
4
|
+
|
5
|
+
it 'substitutes number phrases with a single word' do
|
6
|
+
substitution_for('my hamster has nine lives').should == 'my hamster has 9 lives'
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'substitutes number phrases with several words' do
|
10
|
+
substitution_for('the answer is forty two, really').should == 'the answer is 42, really'
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'returns the source string when there are no matches' do
|
14
|
+
substitution_for("I don't need no stinking numbers").should == "I don't need no stinking numbers"
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'returns the source string for a string that consists only of a numeral' do
|
18
|
+
substitution_for('42').should == '42'
|
19
|
+
end
|
20
|
+
|
21
|
+
it "doesn't get confused by similar looking numbers" do
|
22
|
+
substitution_for('I have seven hundred and seventy seven apricots').should == 'I have 777 apricots'
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'substitutes upper case number phrases' do
|
26
|
+
substitution_for('my hamster has NINE lives').should == 'my hamster has 9 lives'
|
27
|
+
end
|
28
|
+
|
29
|
+
def substitution_for string
|
30
|
+
Numerouno::Substitution.new(string).sub
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
data/spec/spec.opts
ADDED
data/todo
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: numerouno
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brent Snook
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-10-13 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 1.2
|
23
|
+
version: 1.5.2
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.2.
|
33
|
+
version: 1.2.8
|
34
34
|
version:
|
35
35
|
- !ruby/object:Gem::Dependency
|
36
36
|
name: cucumber
|
@@ -40,7 +40,7 @@ dependencies:
|
|
40
40
|
requirements:
|
41
41
|
- - ">="
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: 0.3.
|
43
|
+
version: 0.3.103
|
44
44
|
version:
|
45
45
|
- !ruby/object:Gem::Dependency
|
46
46
|
name: hoe
|
@@ -50,9 +50,16 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ">="
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
53
|
+
version: 2.3.3
|
54
54
|
version:
|
55
|
-
description:
|
55
|
+
description: |-
|
56
|
+
English natural language parser for numbers.
|
57
|
+
|
58
|
+
* Parse 'five' to return 5
|
59
|
+
* Parse 'seven hundred and fifty two billion, four hundred and twenty million, sixty thousand and forty two' to return 752,420,060,042
|
60
|
+
* Parse 'siebenundzwanzig' to have it fail because it doesn't recognise German.
|
61
|
+
|
62
|
+
Recognises numbers in the trillions.
|
56
63
|
email:
|
57
64
|
- brent@fuglylogic.com
|
58
65
|
executables: []
|
@@ -62,21 +69,40 @@ extensions: []
|
|
62
69
|
extra_rdoc_files:
|
63
70
|
- History.txt
|
64
71
|
- Manifest.txt
|
65
|
-
- README.rdoc
|
66
72
|
files:
|
67
73
|
- History.txt
|
68
74
|
- Manifest.txt
|
69
75
|
- README.rdoc
|
70
76
|
- Rakefile
|
71
|
-
-
|
77
|
+
- features/parse_billions.feature
|
78
|
+
- features/parse_hundreds.feature
|
79
|
+
- features/parse_literal_numerals.feature
|
80
|
+
- features/parse_millions.feature
|
81
|
+
- features/parse_powers_of_ten.feature
|
82
|
+
- features/parse_thousands.feature
|
83
|
+
- features/parse_trillions.feature
|
84
|
+
- features/parse_zero_to_ten.feature
|
85
|
+
- features/steps/numbers.rb
|
86
|
+
- features/substitute_numbers_within_a_string.feature
|
87
|
+
- features/support/env.rb
|
72
88
|
- lib/numerouno-parsing.rb
|
89
|
+
- lib/numerouno.rb
|
73
90
|
- lib/numerouno/combiner.rb
|
91
|
+
- lib/numerouno/numbers.rb
|
74
92
|
- lib/numerouno/parser.rb
|
75
93
|
- lib/numerouno/search.rb
|
94
|
+
- lib/numerouno/substitution.rb
|
76
95
|
- script/console
|
77
96
|
- script/destroy
|
78
97
|
- script/generate
|
98
|
+
- spec/numerouno/combiner_spec.rb
|
99
|
+
- spec/numerouno/parser_spec.rb
|
100
|
+
- spec/numerouno/search_spec.rb
|
101
|
+
- spec/numerouno/spec_helper.rb
|
102
|
+
- spec/numerouno/substitution_spec.rb
|
103
|
+
- spec/spec.opts
|
79
104
|
- tasks/rspec.rake
|
105
|
+
- todo
|
80
106
|
has_rdoc: true
|
81
107
|
homepage: http://github.com/brentsnook/numerouno
|
82
108
|
licenses: []
|
@@ -105,6 +131,6 @@ rubyforge_project: numerouno
|
|
105
131
|
rubygems_version: 1.3.5
|
106
132
|
signing_key:
|
107
133
|
specification_version: 3
|
108
|
-
summary: English natural language parser for numbers
|
134
|
+
summary: English natural language parser for numbers
|
109
135
|
test_files: []
|
110
136
|
|