numerizer 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/numerizer.rb +25 -15
- data/numerizer.gemspec +2 -2
- data/test/test_numerizer.rb +11 -2
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
data/lib/numerizer.rb
CHANGED
@@ -26,18 +26,22 @@ class Numerizer
|
|
26
26
|
['nineteen', '19'],
|
27
27
|
['ninteen', '19'], # Common mis-spelling
|
28
28
|
['zero', '0'],
|
29
|
-
['one', '1'],
|
30
|
-
['two', '2'],
|
31
|
-
['three', '3'],
|
32
|
-
['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
33
|
-
['five', '5'],
|
34
|
-
['six(\W|$)', '6\1'],
|
35
|
-
['seven(\W|$)', '7\1'],
|
36
|
-
['eight(\W|$)', '8\1'],
|
37
|
-
['nine(\W|$)', '9\1'],
|
38
29
|
['ten', '10'],
|
39
30
|
['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
|
40
31
|
]
|
32
|
+
|
33
|
+
SINGLE_NUMS = [
|
34
|
+
['one', 1],
|
35
|
+
['two', 2],
|
36
|
+
['three', 3],
|
37
|
+
#['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
|
38
|
+
['four', 4],
|
39
|
+
['five', 5],
|
40
|
+
['six', 6],
|
41
|
+
['seven', 7],
|
42
|
+
['eight', 8],
|
43
|
+
['nine', 9]
|
44
|
+
]
|
41
45
|
|
42
46
|
TEN_PREFIXES = [ ['twenty', 20],
|
43
47
|
['thirty', 30],
|
@@ -76,16 +80,22 @@ class Numerizer
|
|
76
80
|
|
77
81
|
# easy/direct replacements
|
78
82
|
|
79
|
-
DIRECT_NUMS.each do |dn|
|
80
|
-
|
83
|
+
(DIRECT_NUMS + SINGLE_NUMS).each do |dn|
|
84
|
+
# string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
|
85
|
+
string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}<num>" + dn[1].to_s + $2}
|
81
86
|
end
|
82
87
|
|
83
88
|
# ten, twenty, etc.
|
89
|
+
# TEN_PREFIXES.each do |tp|
|
90
|
+
# string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) {'<num>' + (tp[1] + $1.to_i).to_s}
|
91
|
+
# end
|
84
92
|
TEN_PREFIXES.each do |tp|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
93
|
+
SINGLE_NUMS.each do |dn|
|
94
|
+
string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) {
|
95
|
+
"#{$1}<num>" + (tp[1] + dn[1]).to_s + $2
|
96
|
+
}
|
97
|
+
end
|
98
|
+
string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}<num>" + tp[1].to_s + $2 }
|
89
99
|
end
|
90
100
|
|
91
101
|
# handle fractions
|
data/numerizer.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{numerizer}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["John Duff"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-01}
|
13
13
|
s.description = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.}
|
14
14
|
s.email = %q{duff.john@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/test_numerizer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'test_helper'
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
2
|
|
3
3
|
class NumerizerTest < Test::Unit::TestCase
|
4
4
|
def test_straight_parsing
|
@@ -31,7 +31,7 @@ class NumerizerTest < Test::Unit::TestCase
|
|
31
31
|
1_200 => 'twelve hundred',
|
32
32
|
1_200 => 'one thousand two hundred',
|
33
33
|
17_000 => 'seventeen thousand',
|
34
|
-
|
34
|
+
21_473 => 'twentyone-thousand-four-hundred-and-seventy-three',
|
35
35
|
74_002 => 'seventy four thousand and two',
|
36
36
|
99_999 => 'ninety nine thousand nine hundred ninety nine',
|
37
37
|
100_000 => '100 thousand',
|
@@ -48,6 +48,11 @@ class NumerizerTest < Test::Unit::TestCase
|
|
48
48
|
assert_equal "2.5", Numerizer.numerize("two and a half")
|
49
49
|
assert_equal "1/2", Numerizer.numerize("one half")
|
50
50
|
end
|
51
|
+
|
52
|
+
def test_combined_double_digets
|
53
|
+
assert_equal "21", Numerizer.numerize("twentyone")
|
54
|
+
assert_equal "37", Numerizer.numerize("thirtyseven")
|
55
|
+
end
|
51
56
|
|
52
57
|
def test_fractions_in_words
|
53
58
|
assert_equal "1/4", Numerizer.numerize("1 quarter")
|
@@ -65,6 +70,10 @@ class NumerizerTest < Test::Unit::TestCase
|
|
65
70
|
assert_equal "2.375", Numerizer.numerize("two and three eighths")
|
66
71
|
assert_equal "3.5 hours", Numerizer.numerize("three and a half hours")
|
67
72
|
end
|
73
|
+
|
74
|
+
def test_word_with_a_number
|
75
|
+
assert_equal "pennyweight", Numerizer.numerize("pennyweight")
|
76
|
+
end
|
68
77
|
|
69
78
|
def test_edges
|
70
79
|
assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: numerizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Duff
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-01 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|