numerizer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Tom Preston-Werner
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,24 @@
1
+ = Numerizer
2
+
3
+ Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.
4
+
5
+ == Installation
6
+
7
+ $ sudo gem sources -a http://gemcutter.org
8
+ $ sudo gem install numerizer
9
+
10
+ == Usage
11
+
12
+ >> require 'numerizer'
13
+ => true
14
+ >> Numerizer.numerize('forty two')
15
+ => "42"
16
+ >> Numerizer.numerize('two and a half')
17
+ => "2.5"
18
+ >> Numerizer.numerize('three quarters')
19
+ => "3/4"
20
+ >> Numerizer.numerize('two and three eighths')
21
+ => "2.375"
22
+
23
+ == Contributors
24
+ Tom Preston-Werner, John Duff
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "numerizer"
8
+ gem.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)."
9
+ gem.description = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic."
10
+ gem.email = "duff.john@gmail.com"
11
+ gem.homepage = "http://github.com/jduff/numerizer"
12
+ gem.authors = ["John Duff"]
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+ end
15
+ Jeweler::GemcutterTasks.new
16
+ rescue LoadError
17
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
18
+ end
19
+
20
+ require 'rake/testtask'
21
+ Rake::TestTask.new(:test) do |test|
22
+ test.libs << 'lib' << 'test'
23
+ test.pattern = 'test/**/test_*.rb'
24
+ test.verbose = true
25
+ end
26
+
27
+ begin
28
+ require 'rcov/rcovtask'
29
+ Rcov::RcovTask.new do |test|
30
+ test.libs << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+ rescue LoadError
35
+ task :rcov do
36
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
37
+ end
38
+ end
39
+
40
+ task :test => :check_dependencies
41
+
42
+ task :default => :test
43
+
44
+ require 'rake/rdoctask'
45
+ Rake::RDocTask.new do |rdoc|
46
+ if File.exist?('VERSION')
47
+ version = File.read('VERSION')
48
+ else
49
+ version = ""
50
+ end
51
+
52
+ rdoc.rdoc_dir = 'rdoc'
53
+ rdoc.title = "numerizer #{version}"
54
+ rdoc.rdoc_files.include('README*')
55
+ rdoc.rdoc_files.include('lib/**/*.rb')
56
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,123 @@
1
+ # LICENSE:
2
+ #
3
+ # (The MIT License)
4
+ #
5
+ # Copyright © 2008 Tom Preston-Werner
6
+ #
7
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8
+ #
9
+ # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10
+ #
11
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12
+
13
+ require 'strscan'
14
+
15
+ class Numerizer
16
+
17
+ DIRECT_NUMS = [
18
+ ['eleven', '11'],
19
+ ['twelve', '12'],
20
+ ['thirteen', '13'],
21
+ ['fourteen', '14'],
22
+ ['fifteen', '15'],
23
+ ['sixteen', '16'],
24
+ ['seventeen', '17'],
25
+ ['eighteen', '18'],
26
+ ['nineteen', '19'],
27
+ ['ninteen', '19'], # Common mis-spelling
28
+ ['zero', '0'],
29
+ ['one', '1'],
30
+ ['two', '2'],
31
+ ['three', '3'],
32
+ ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
33
+ ['five', '5'],
34
+ ['six(\W|$)', '6\1'],
35
+ ['seven(\W|$)', '7\1'],
36
+ ['eight(\W|$)', '8\1'],
37
+ ['nine(\W|$)', '9\1'],
38
+ ['ten', '10'],
39
+ ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
40
+ ]
41
+
42
+ TEN_PREFIXES = [ ['twenty', 20],
43
+ ['thirty', 30],
44
+ ['forty', 40],
45
+ ['fourty', 40], # Common misspelling
46
+ ['fifty', 50],
47
+ ['sixty', 60],
48
+ ['seventy', 70],
49
+ ['eighty', 80],
50
+ ['ninety', 90]
51
+ ]
52
+
53
+ BIG_PREFIXES = [ ['hundred', 100],
54
+ ['thousand', 1000],
55
+ ['million', 1_000_000],
56
+ ['billion', 1_000_000_000],
57
+ ['trillion', 1_000_000_000_000],
58
+ ]
59
+
60
+ FRACTIONS = [ ['half', 2],
61
+ ['third(s)?', 3],
62
+ ['fourth(s)?', 4],
63
+ ['quarter(s)?', 4],
64
+ ['fifth(s)?', 5],
65
+ ['sixth(s)?', 6],
66
+ ['seventh(s)?', 7],
67
+ ['eighth(s)?', 8],
68
+ ['nineth(s)?', 9],
69
+ ]
70
+
71
+ def self.numerize(string)
72
+ string = string.dup
73
+
74
+ # preprocess
75
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words
76
+
77
+ # easy/direct replacements
78
+
79
+ DIRECT_NUMS.each do |dn|
80
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
81
+ end
82
+
83
+ # ten, twenty, etc.
84
+ TEN_PREFIXES.each do |tp|
85
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
86
+ end
87
+ TEN_PREFIXES.each do |tp|
88
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
89
+ end
90
+
91
+ # handle fractions
92
+ FRACTIONS.each do |tp|
93
+ string.gsub!(/a #{tp[0]}/i) { '<num>1/' + tp[1].to_s }
94
+ string.gsub!(/\s#{tp[0]}/i) { '/' + tp[1].to_s }
95
+ end
96
+
97
+ # evaluate fractions when preceded by another number
98
+ string.gsub!(/(\d+)(?: | and |-)+(<num>|\s)*(\d+)\s*\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s }
99
+
100
+ # hundreds, thousands, millions, etc.
101
+ BIG_PREFIXES.each do |bp|
102
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
103
+ andition(string)
104
+ end
105
+
106
+ andition(string)
107
+
108
+ string.gsub(/<num>/, '')
109
+ end
110
+
111
+ private
112
+
113
+ def self.andition(string)
114
+ sc = StringScanner.new(string)
115
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
116
+ if sc[2] =~ /and/ || sc[1].size > sc[3].size
117
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
118
+ sc.reset
119
+ end
120
+ end
121
+ end
122
+
123
+ end
@@ -0,0 +1,50 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{numerizer}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["John Duff"]
12
+ s.date = %q{2009-12-25}
13
+ s.description = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.}
14
+ s.email = %q{duff.john@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/numerizer.rb",
27
+ "numerizer.gemspec",
28
+ "test/test_helper.rb",
29
+ "test/test_numerizer.rb"
30
+ ]
31
+ s.homepage = %q{http://github.com/jduff/numerizer}
32
+ s.rdoc_options = ["--charset=UTF-8"]
33
+ s.require_paths = ["lib"]
34
+ s.rubygems_version = %q{1.3.5}
35
+ s.summary = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two).}
36
+ s.test_files = [
37
+ "test/test_helper.rb",
38
+ "test/test_numerizer.rb"
39
+ ]
40
+
41
+ if s.respond_to? :specification_version then
42
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
46
+ else
47
+ end
48
+ else
49
+ end
50
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+ require 'numerizer'
7
+
8
+ class Test::Unit::TestCase
9
+ end
@@ -0,0 +1,83 @@
1
+ require 'test_helper'
2
+
3
+ class NumerizerTest < Test::Unit::TestCase
4
+ def test_straight_parsing
5
+ strings = { 1 => 'one',
6
+ 5 => 'five',
7
+ 10 => 'ten',
8
+ 11 => 'eleven',
9
+ 12 => 'twelve',
10
+ 13 => 'thirteen',
11
+ 14 => 'fourteen',
12
+ 15 => 'fifteen',
13
+ 16 => 'sixteen',
14
+ 17 => 'seventeen',
15
+ 18 => 'eighteen',
16
+ 19 => 'nineteen',
17
+ 20 => 'twenty',
18
+ 27 => 'twenty seven',
19
+ 31 => 'thirty-one',
20
+ 41 => 'forty one',
21
+ 42 => 'fourty two',
22
+ 59 => 'fifty nine',
23
+ 100 => 'a hundred',
24
+ 100 => 'one hundred',
25
+ 150 => 'one hundred and fifty',
26
+ # 150 => 'one fifty',
27
+ 200 => 'two-hundred',
28
+ 500 => '5 hundred',
29
+ 999 => 'nine hundred and ninety nine',
30
+ 1_000 => 'one thousand',
31
+ 1_200 => 'twelve hundred',
32
+ 1_200 => 'one thousand two hundred',
33
+ 17_000 => 'seventeen thousand',
34
+ 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
35
+ 74_002 => 'seventy four thousand and two',
36
+ 99_999 => 'ninety nine thousand nine hundred ninety nine',
37
+ 100_000 => '100 thousand',
38
+ 250_000 => 'two hundred fifty thousand',
39
+ 1_000_000 => 'one million',
40
+ 1_250_007 => 'one million two hundred fifty thousand and seven',
41
+ 1_000_000_000 => 'one billion',
42
+ 1_000_000_001 => 'one billion and one' }
43
+
44
+ strings.keys.sort.each do |key|
45
+ assert_equal key, Numerizer.numerize(strings[key]).to_i
46
+ end
47
+
48
+ assert_equal "2.5", Numerizer.numerize("two and a half")
49
+ assert_equal "1/2", Numerizer.numerize("one half")
50
+ end
51
+
52
+ def test_fractions_in_words
53
+ assert_equal "1/4", Numerizer.numerize("1 quarter")
54
+ assert_equal "1/4", Numerizer.numerize("one quarter")
55
+ assert_equal "1/4", Numerizer.numerize("a quarter")
56
+ assert_equal "1/8", Numerizer.numerize("one eighth")
57
+
58
+ assert_equal "3/4", Numerizer.numerize("three quarters")
59
+ assert_equal "2/4", Numerizer.numerize("two fourths")
60
+ assert_equal "3/8", Numerizer.numerize("three eighths")
61
+ end
62
+
63
+ def test_fractional_addition
64
+ assert_equal "1.25", Numerizer.numerize("one and a quarter")
65
+ assert_equal "2.375", Numerizer.numerize("two and three eighths")
66
+ assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
67
+ end
68
+
69
+ def test_edges
70
+ assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
71
+ end
72
+
73
+ def test_multiple_slashes_should_not_be_evaluated
74
+ assert_equal '11/02/2007', Numerizer.numerize('11/02/2007')
75
+ end
76
+
77
+ def test_compatability
78
+ assert_equal '1/2', Numerizer.numerize('1/2')
79
+ assert_equal '05/06', Numerizer.numerize('05/06')
80
+ assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
81
+ end
82
+
83
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: numerizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - John Duff
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-25 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.
17
+ email: duff.john@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - .document
27
+ - .gitignore
28
+ - LICENSE
29
+ - README.rdoc
30
+ - Rakefile
31
+ - VERSION
32
+ - lib/numerizer.rb
33
+ - numerizer.gemspec
34
+ - test/test_helper.rb
35
+ - test/test_numerizer.rb
36
+ has_rdoc: true
37
+ homepage: http://github.com/jduff/numerizer
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options:
42
+ - --charset=UTF-8
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.3.5
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two).
64
+ test_files:
65
+ - test/test_helper.rb
66
+ - test/test_numerizer.rb